In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
np.random.seed(123)  # for reproducibility
from tqdm import tqdm

import keras
from keras.models import Sequential, load_model, model_from_json
from keras.applications.resnet50 import preprocess_input, ResNet50
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import MaxPooling2D, Conv2D, BatchNormalization
from keras.utils import np_utils, to_categorical
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau, EarlyStopping
from keras.optimizers import SGD
from keras.metrics import binary_accuracy, categorical_accuracy, binary_crossentropy, categorical_crossentropy
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight

In [16]:
CLASS_MAP = {'antelope': 0,
 'bat': 1,
 'beaver': 2,
 'bobcat': 3,
 'buffalo': 4,
 'chihuahua': 5,
 'chimpanzee': 6,
 'collie': 7,
 'dalmatian': 8,
 'german+shepherd': 9,
 'grizzly+bear': 10,
 'hippopotamus': 11,
 'horse': 12,
 'killer+whale': 13,
 'mole': 14,
 'moose': 15,
 'mouse': 16,
 'otter': 17,
 'ox': 18,
 'persian+cat': 19,
 'raccoon': 20,
 'rat': 21,
 'rhinoceros': 22,
 'seal': 23,
 'siamese+cat': 24,
 'spider+monkey': 25,
 'squirrel': 26,
 'walrus': 27,
 'weasel': 28,
 'wolf': 29}
# CLASS_WEIGHTS = {0: 0.0017985611510791368,
#  1: 0.004878048780487805,
#  2: 0.009345794392523364,
#  3: 0.0029850746268656717,
#  4: 0.002061855670103093,
#  5: 0.003236245954692557,
#  6: 0.002617801047120419,
#  7: 0.001838235294117647,
#  8: 0.003472222222222222,
#  9: 0.0018181818181818182,
#  10: 0.0021413276231263384,
#  11: 0.002688172043010753,
#  12: 0.0011248593925759281,
#  13: 0.0064516129032258064,
#  14: 0.020833333333333332,
#  15: 0.0026246719160104987,
#  16: 0.01,
#  17: 0.002403846153846154,
#  18: 0.0024937655860349127,
#  19: 0.002544529262086514,
#  20: 0.0036101083032490976,
#  21: 0.005681818181818182,
#  22: 0.002617801047120419,
#  23: 0.0018796992481203006,
#  24: 0.003663003663003663,
#  25: 0.006578947368421052,
#  26: 0.0015455950540958269,
#  27: 0.008403361344537815,
#  28: 0.006756756756756757,
#  29: 0.0030959752321981426}
CLASS_WEIGHTS = compute_class_weight('balanced', np.arange(0, 30), target.argmax(axis=1))
CLASS_WEIGHTS

array([0.6235012 , 1.69270833, 3.25814536, 1.03668262, 0.71507151,
       1.12262522, 0.90845563, 0.6372549 , 1.20705664, 0.63076177,
       0.74328188, 0.93390805, 0.390039  , 2.24525043, 7.22222222,
       0.91036415, 3.49462366, 0.83493899, 0.86493679, 0.88255261,
       1.25240848, 1.96969697, 0.90845563, 0.65162907, 1.27077224,
       2.29276896, 0.53630363, 2.92792793, 2.35507246, 1.07526882])

In [17]:
train = pd.read_csv("extracted/train_resnet_feat.csv", index_col="Image_id")
target = train["target"]
train = train.drop("target", axis=1).values
target = target.map(CLASS_MAP).values
train.shape, target.shape

((13000, 2048), (13000,))

In [18]:
def plot_history(history):
    loss_list = [s for s in history.history.keys() if 'acc' not in s and 'val' not in s]
    val_loss_list = [s for s in history.history.keys() if 'acc' not in s and 'val' in s]
    acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' not in s]
    val_acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' in s]
    
    if len(loss_list) == 0:
        print('Loss is missing in history')
        return 
    
    ## As loss always exists
    epochs = range(1,len(history.history[loss_list[0]]) + 1)
    
    ## Loss
    for ll, vl in zip(loss_list, val_loss_list):
        plt.figure()
        plt.plot(epochs, history.history[ll], 'b', label=f'{ll} ({history.history[ll][-1]:.5f})')
        plt.plot(epochs, history.history[vl], 'g', label=f'{vl} ({history.history[vl][-1]:.5f})')
    
        plt.title('Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
    
    ## Accuracy
    for ll, vl in zip(acc_list, val_acc_list):
        plt.figure()
        plt.plot(epochs, history.history[ll], 'b', label=f'{ll} ({history.history[ll][-1]:.5f})')
        plt.plot(epochs, history.history[vl], 'g', label=f'{vl} ({history.history[vl][-1]:.5f})')
        plt.title('Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()
    plt.show()

In [19]:
target = to_categorical(target)
target.shape

(13000, 30)

In [36]:
def load_model():
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_shape=(2048,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(30, activation='softmax'))
#     opt = 'adam'
    opt = SGD(lr=0.0003, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=[categorical_accuracy])
    return model
def fit_model(model, train, target, val_data, val_target, epochs, batch_size=32, verbose=2):
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3,
                                  patience=2, min_lr=0.0000001, verbose=1)
    early_stops = EarlyStopping(monitor='val_loss',
                    patience=15, verbose=1,)
    
    history = model.fit(train, target,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=(val_data, val_target),
          class_weight=CLASS_WEIGHTS, 
            callbacks=[reduce_lr, early_stops],
            verbose=verbose)
    return history

In [34]:
epochs = 30
batch_size = 32

In [35]:
k_folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [32]:
kfold_history = []
for it, (traindex, testdex) in enumerate(k_folds.split(train, target.argmax(axis=1))):
    print(f"FOLD {it+1}/{k_folds.get_n_splits()}...")
    model = load_model()
    history = fit_model(model, train[traindex], target[traindex], 
                        train[testdex], target[testdex], 
                        epochs=epochs, batch_size=batch_size, 
                        verbose=1)
    kfold_history.append(history)

FOLD 1/5...
Train on 10387 samples, validate on 2613 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
FOLD 2/5...
Train on 10396 samples, validate on 2604 samples
Epoch 1/30
Epoch 2/30

KeyboardInterrupt: 

In [None]:
val_losses = []
losses = []
for hist in kfold_history:
    val_losses.append(hist.history['val_loss'][-1])
    losses.append(hist.history['loss'][-1])
print("BatchNorm with SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) [30 epochs]")
print(f"Average validation loss:   {np.mean(val_losses):.4f} + {np.std(val_losses):.4f}")
print(f"Average training loss: {np.mean(losses):.4f} + {np.mean(losses):.4f}")

In [24]:
val_losses = []
losses = []
for hist in kfold_history:
    val_losses.append(hist.history['val_loss'][-1])
    losses.append(hist.history['loss'][-1])
print("BatchNorm with SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) [30 epochs]")
print(f"Average validation loss:   {np.mean(val_losses):.4f} + {np.std(val_losses):.4f}")
print(f"Average training loss: {np.mean(losses):.4f} + {np.mean(losses):.4f}")

BatchNorm with SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) [30 epochs]
Average validation loss:   0.3080 + 0.0000
Average training loss: 0.0889 + 0.0889


In [39]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3,
                                  patience=2, min_lr=0.0000001, verbose=1)
early_stops = EarlyStopping(monitor='val_loss',
                patience=15, verbose=1,)
epochs = 25
batch_size = 60
model = load_model()
history = model.fit(train, target,
          epochs=100,
          batch_size=batch_size,
          class_weight=CLASS_WEIGHTS, validation_split=0.2,
            callbacks=[reduce_lr, early_stops],
            verbose=1)

Train on 10400 samples, validate on 2600 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100

Epoch 00045: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 46/100
Epoch 47/100


Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100

Epoch 00051: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100

Epoch 00055: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 56/100
Epoch 57/100
Epoch 58/100

Epoch 00058: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100

Epoch 00062: ReduceLROnPlateau reducing learning rate to 7.289999985005124e-07.
Epoch 63/100
Epoch 64/100

Epoch 00064: ReduceLROnPlateau reducing learning rate to 2.1870000637136398e-07.
Epoch 65/100
Epoch 66/100

Epoch 00066: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 67/100
Epoch 68/100

Epoch 00068: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 69/100
Epoch 70/100

Epoch 00070: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 71/100
Epoch 72/100

Epoch 00072: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 73/

In [174]:
top_model_weights_path = 'MODEL/top_model_initial.h5'
model.save_weights(top_model_weights_path)
with open("MODEL/top_model_initial.json", "w") as fp:
    fp.write(model.to_json())

In [40]:
model.save("MODEL/ResNet/top_model.h5")