In [3]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
np.random.seed(123)  # for reproducibility

from keras.models import Sequential, load_model, model_from_json, Model
from keras.applications.densenet import preprocess_input, DenseNet201
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import MaxPooling2D, Conv2D
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau, EarlyStopping
from keras.optimizers import SGD
from keras.metrics import categorical_accuracy

import tensorflow as tf
# run_opts = tf.RunOptions(report_tensor_allocations_upon_oom = True)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
CLASS_MAP = {'antelope': 0,
 'bat': 1,
 'beaver': 2,
 'bobcat': 3,
 'buffalo': 4,
 'chihuahua': 5,
 'chimpanzee': 6,
 'collie': 7,
 'dalmatian': 8,
 'german+shepherd': 9,
 'grizzly+bear': 10,
 'hippopotamus': 11,
 'horse': 12,
 'killer+whale': 13,
 'mole': 14,
 'moose': 15,
 'mouse': 16,
 'otter': 17,
 'ox': 18,
 'persian+cat': 19,
 'raccoon': 20,
 'rat': 21,
 'rhinoceros': 22,
 'seal': 23,
 'siamese+cat': 24,
 'spider+monkey': 25,
 'squirrel': 26,
 'walrus': 27,
 'weasel': 28,
 'wolf': 29}
CLASS_WEIGHTS = [0.6235012 , 1.69270833, 3.25814536, 1.03668262, 0.71507151,
       1.12262522, 0.90845563, 0.6372549 , 1.20705664, 0.63076177,
       0.74328188, 0.93390805, 0.390039  , 2.24525043, 7.22222222,
       0.91036415, 3.49462366, 0.83493899, 0.86493679, 0.88255261,
       1.25240848, 1.96969697, 0.90845563, 0.65162907, 1.27077224,
       2.29276896, 0.53630363, 2.92792793, 2.35507246, 1.07526882]

In [5]:
batch_size = 16
resol = 224

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.05,
        height_shift_range=0.05,
        shear_range=0.5,
        zoom_range=0.1,
        horizontal_flip=True,
        preprocessing_function=preprocess_input,
        )
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(horizontal_flip=True,
                                  preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
        'train_data/',  # this is the target directory
        target_size=(resol, resol),
        batch_size=batch_size,
        class_mode='categorical') 


# # this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'test_data/',
        target_size=(resol, resol),
        batch_size=batch_size,
        class_mode='categorical')

Found 10412 images belonging to 30 classes.
Found 2588 images belonging to 30 classes.


In [6]:
def load_top_model_densenet():
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_shape=(1920,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(30, activation='softmax'))
    return model

In [8]:
base_model = DenseNet201(include_top=False, pooling='avg')
top = load_top_model_densenet()

x = top(base_model.outputs)
model = Model(inputs=base_model.inputs, outputs=x)

In [9]:
for layer in base_model.layers:
    layer.trainable = False

In [11]:
lr_list = [0.0003, 0.0001, 0.0001, 0.0001, 0.0001]
epoch_list = [20, 10, 10, 10, 10]
for rnd in range(5):
    n_layers = rnd * 5
    if n_layers > 0:
        for layer in base_model.layers[-n_layers:]:
            layer.trainable = True
    n_trainable = sum(m.trainable for m in base_model.layers)
    print(f"\nFinetuning {n_trainable} hidden layers")
    
    opt = SGD(lr=lr_list[rnd], decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=[categorical_accuracy])
    
    path = f"checkpoints/DenseNet/weights_unfreezed_{n_trainable}."
    model_checkpointer = ModelCheckpoint(filepath=path+"{epoch:02d}-{val_loss:.2f}.hdf5", 
                                        verbose=1, save_best_only=True, save_weights_only=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3,
                                  patience=2, min_lr=0.0000001, verbose=1)
    early_stops = EarlyStopping(monitor='val_loss',
                    patience=15, verbose=1,)
    
    my_callbacks = []
    my_callbacks.append(model_checkpointer)
    my_callbacks.append(reduce_lr)
    my_callbacks.append(early_stops)
    
    history = model.fit_generator(
            train_generator,
            steps_per_epoch=10412 // batch_size, # 10412
            epochs=epoch_list[rnd],
            verbose=1,
            callbacks=my_callbacks,
            validation_data=validation_generator,
            validation_steps=2588 // batch_size, # 2588
            class_weight=CLASS_WEIGHTS,
            )
    pd.DataFrame(history.history).to_csv(f"RESULTS/DenseNet/history_unfreezed_{n_trainable}.csv")
    model.save(f'MODEL/DenseNet/densenet_unfreezed_{n_trainable}.h5') 
    


Finetuning 0 hidden layers
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.41566, saving model to checkpoints/DenseNet/weights_unfreezed_0.01-0.42.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 0.41566 to 0.37574, saving model to checkpoints/DenseNet/weights_unfreezed_0.02-0.38.hdf5
Epoch 3/20

Epoch 00003: val_loss improved from 0.37574 to 0.33850, saving model to checkpoints/DenseNet/weights_unfreezed_0.03-0.34.hdf5
Epoch 4/20

Epoch 00004: val_loss improved from 0.33850 to 0.32034, saving model to checkpoints/DenseNet/weights_unfreezed_0.04-0.32.hdf5
Epoch 5/20

Epoch 00005: val_loss improved from 0.32034 to 0.29793, saving model to checkpoints/DenseNet/weights_unfreezed_0.05-0.30.hdf5
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.29793
Epoch 7/20

Epoch 00007: val_loss improved from 0.29793 to 0.29345, saving model to checkpoints/DenseNet/weights_unfreezed_0.07-0.29.hdf5
Epoch 8/20

Epoch 00008: val_loss improved from 0.29345 to 0.28791, saving model to c


Epoch 00010: ReduceLROnPlateau reducing learning rate to 2.6999998226528985e-06.

Finetuning 10 hidden layers
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.21712, saving model to checkpoints/DenseNet/weights_unfreezed_10.01-0.22.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 0.21712 to 0.21659, saving model to checkpoints/DenseNet/weights_unfreezed_10.02-0.22.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 0.21659 to 0.21599, saving model to checkpoints/DenseNet/weights_unfreezed_10.03-0.22.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.21599 to 0.21565, saving model to checkpoints/DenseNet/weights_unfreezed_10.04-0.22.hdf5
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.21565
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.21565

Epoch 00006: ReduceLROnPlateau reducing learning rate to 2.9999999242136255e-05.
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.21565
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.21565

Ep


Epoch 00010: val_loss did not improve from 0.20149


## Resnet

In [18]:
from keras.applications.resnet50 import preprocess_input, ResNet50
import gc
def load_top_model_resnet():
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_shape=(2048,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(30, activation='softmax'))
    return model

In [19]:
batch_size = 16
resol = 224

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.05,
        height_shift_range=0.05,
        shear_range=0.5,
        zoom_range=0.1,
        horizontal_flip=True,
        preprocessing_function=preprocess_input,
        )
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(horizontal_flip=True,
                                  preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
        'train_data/',  # this is the target directory
        target_size=(resol, resol),
        batch_size=batch_size,
        class_mode='categorical') 


# # this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'test_data/',
        target_size=(resol, resol),
        batch_size=batch_size,
        class_mode='categorical')

Found 10412 images belonging to 30 classes.
Found 2588 images belonging to 30 classes.


In [20]:
base_model = ResNet50(include_top=False, pooling='avg')
top = load_top_model_resnet()

x = top(base_model.outputs)
model = Model(inputs=base_model.inputs, outputs=x)
del top 
gc.collect()

118037

In [None]:
for layer in base_model.layers:
    layer.trainable = False

lr_list = [0.0003, 0.0001, 0.0001, 0.0001, 0.0001]
epoch_list = [20, 10, 10, 10, 10]
for rnd in range(5):
    n_layers = rnd * 4
    if n_layers > 0:
        for layer in base_model.layers[-n_layers:]:
            layer.trainable = True
    n_trainable = sum(m.trainable for m in base_model.layers)
    print(f"\nFinetuning {n_trainable} hidden layers")
    
    opt = SGD(lr=lr_list[rnd], decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=[categorical_accuracy])
    
    path = f"checkpoints/ResNet/weights_unfreezed_{n_trainable}."
    model_checkpointer = ModelCheckpoint(filepath=path+"{epoch:02d}-{val_loss:.2f}.hdf5", 
                                        verbose=1, save_best_only=True, save_weights_only=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3,
                                  patience=2, min_lr=0.0000001, verbose=1)
    early_stops = EarlyStopping(monitor='val_loss',
                    patience=6, verbose=1,)
    
    my_callbacks = []
    my_callbacks.append(model_checkpointer)
    my_callbacks.append(reduce_lr)
    my_callbacks.append(early_stops)
    
    history = model.fit_generator(
            train_generator,
            steps_per_epoch=10412 // batch_size, # 10412
            epochs=epoch_list[rnd],
            verbose=1,
            callbacks=my_callbacks,
            validation_data=validation_generator,
            validation_steps=2588 // batch_size, # 2588
            class_weight=CLASS_WEIGHTS,
            )
    pd.DataFrame(history.history).to_csv(f"RESULTS/ResNet/history_unfreezed_{n_trainable}.csv")
    model.save(f'MODEL/ResNet/densenet_unfreezed_{n_trainable}.h5') 



Finetuning 0 hidden layers
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.39958, saving model to checkpoints/ResNet/weights_unfreezed_0.01-0.40.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 0.39958 to 0.38230, saving model to checkpoints/ResNet/weights_unfreezed_0.02-0.38.hdf5
Epoch 3/20

Epoch 00003: val_loss improved from 0.38230 to 0.37682, saving model to checkpoints/ResNet/weights_unfreezed_0.03-0.38.hdf5
Epoch 4/20

Epoch 00004: val_loss did not improve from 0.37682
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.37682

Epoch 00005: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.37682
Epoch 7/20

Epoch 00007: val_loss improved from 0.37682 to 0.37446, saving model to checkpoints/ResNet/weights_unfreezed_0.07-0.37.hdf5
Epoch 8/20

Epoch 00008: val_loss did not improve from 0.37446
Epoch 9/20

Epoch 00009: val_loss did not improve from 0.37446

Epoch 00009: ReduceLROnPlateau redu

## Prediction

In [None]:
from natsort import natsorted

In [None]:
df = pd.read_csv("meta-data/sample_submission.csv")
cols = list(df.columns)
cols.remove("image_id")
pred_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
gen = pred_datagen.flow_from_directory(
        'predict_data/',  # this is the target directory
        target_size=(resol, resol),
        batch_size=30,
        class_mode=None,
        shuffle=False) 

preds = model.predict_generator(gen, steps=200, verbose=1)

In [None]:
df = pd.DataFrame(preds, columns=cols)
df["image_id"] = gen.filenames
df = df[["image_id"] + cols]
def f(x):
    if x.startswith("predict/"):
        return x[8:]
df["image_id"] = df["image_id"].apply(f)
fnames = os.listdir("predict_data/predict/")
fnames = natsorted(fnames)
df = df.set_index("image_id")
df = df.reindex(fnames)
_ = df.hist(figsize=(20, 15), bins=10)

In [None]:
df.to_csv("SUB/Sub8.csv", index=True)
model.save("SUB/Sub8_weights-ResNet.h5")