Building CNN using Keras

In [None]:
%matplotlib inline

import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

seed = 1234
np.random.seed(seed)


from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

from keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Conv2D # spatial convolution over images
from keras.layers import MaxPooling2D # operation for spatial data
from keras.layers import Dense # densely-connected NN layer
from keras.layers import Dropout
from keras.layers import Activation
from keras.layers import Flatten
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.optimizers import Nadam
from keras.utils import to_categorical

# Local imports
import adjust_path  # Before doing any local imports
from icc.data_loader import DataLoader
from icc.contrib.preprocessing.utils import *

In [None]:
train_augmented = False
if train_augmented:
    # Load augmented data
    with open('../data/train_augmented.json', 'r') as f:
        data = json.load(f)

    data = pd.DataFrame(data)
    X = data[['band_1','band_2']]
    y = data['is_iceberg']
else:
    X, y = DataLoader.load_train()

print("{} samples in X and y".format(len(X)))
X.inc_angle = X.inc_angle.replace('na',0)
idx_tr = np.where(X.inc_angle>0) # about 1471 images greater than angle 0

# Actual training of the network

# Preprocessing step
prep = Preprocess()
x_train, x_valid, y_train, y_valid = prep._basic_trainset(X, y, how='deep', test_size=0.0)

# Convert labels to categorical one-hot encoding
#y_onehot_train= to_categorical(y_train, num_classes=2)
#y_onehot_valid= to_categorical(y_valid, num_classes=2)

In [None]:
y_train = y_train[idx_tr[0]]
x_train = x_train[idx_tr[0],...]

In [None]:
def build_model():

    # Start by defining a Sequential model.
    model = Sequential()

    # Next add layers via the `.add()` method

    # Conv Layer 1
    model.add(Conv2D(filters=64, 
                     kernel_size=(3, 3), 
                     strides=(1, 1), 
                     padding="valid", 
                     input_shape=(75, 75, 3))) # change from rgb to gray scale
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))


    # Conv Layer 2
    model.add(Conv2D(filters=128, 
                     kernel_size=(3, 3), 
                     strides=(1, 1), 
                     padding="valid"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.4))

    # Conv Layer 2
    model.add(Conv2D(filters=128, 
                     kernel_size=(3, 3), 
                     strides=(1, 1), 
                     padding="valid"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.4))


    # Conv Layer 4
    model.add(Conv2D(filters=256, 
                     kernel_size=(3, 3), 
                     strides=(1, 1), 
                     padding="valid"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # Flatten the data for Fully-connected layers. Does not affect the batch size.
    model.add(Flatten())

    # Fully-connected layers

    # Dense Layer 1
    model.add(Dense(units=1024)) # 512
    model.add(Activation('relu'))
    model.add(Dropout(0.4))

    # Dense Layer 2
    model.add(Dense(units=256))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    # Before training we need to configure the learning process.
    adam_opt = Nadam(lr=0.0001, epsilon=1e-8)
    model.compile(optimizer=adam_opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = build_model()

# Prints a summary representation of your model.
#model.summary()

In [None]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

scores = {'loss':[], 'acc':[]}
for fold_n, (train, test) in enumerate(kfold.split(x_train, y_train)):
    print("FOLD number: ", fold_n)
    model = build_model()
    
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=15, verbose=1, epsilon=1e-4, mode='min')
    
    model.fit(x_train[train], y_train[train], 
              epochs=100,
              verbose=2,
              batch_size=24,
              validation_data=(x_train[test], y_train[test]),
              callbacks=[reduce_lr_loss])
    
    score = model.evaluate(x_train[test], y_train[test], verbose=1)
    print('\n Val score:', score[0])
    print('\n Val accuracy:', score[1])
    scores['loss'].append(score[0])
    scores['acc'].append(score[1])
    
print('Mean loss:', np.array(scores['loss']).mean())
print('Mean acc:', np.array(scores['acc']).mean())

In [None]:
# report
# Mean loss: 0.23750277354
# Mean acc: 0.907542492985

In [None]:
filepath = './saved_model/weights-Epoch{epoch:02d}-ValLoss{val_loss:.4f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', save_best_only=True, mode='max')
#earlystop = EarlyStopping(monitor='val_loss', patience=20)
callback = [checkpoint]#, earlystop]

model_history = model.fit(x_train, y_train, 
                          epochs=20,
                          verbose=2,
                          batch_size=32,
                          validation_data=(x_valid, y_valid),
                          callbacks=callback)

In [None]:
model_history = pd.DataFrame(model_history.history)
ax = model_history.plot(y=['val_loss','loss'])
model_history.plot(y=['val_acc','acc'], ax=ax)

Notes:
    
Heavily overfitting
- reduced number of total params:

    - 560,450 filters 64
    - 3,566,850 no 4th conv
    - 1,175,042 filters 256

In [None]:
score = model.evaluate(x_valid, y_onehot_valid)

In [None]:
preds = model.predict_proba(x_valid)

#correct_predictions = np.argmax(preds, axis=1) == y_valid
is_iceberg = [1 if p > 0.55 else 0 for p in preds]

correct_predictions = is_iceberg == y_valid

wrong = np.where(correct_predictions==False)

In [None]:
im = 54
plt.imshow(np.squeeze(x_valid[im]))
print('label', y_valid[im])
print('score', preds[im])

In [None]:
MODEL_DIR = 'saved_model'
best = 'weights-Epoch12-ValLoss0.2140.hdf5'
model.load_weights(filepath=os.path.join(MODEL_DIR, best))

In [None]:
DATA_DIR = '../data/'
print('Predicting and writing submission for test data...')
X = DataLoader.load_test()

In [None]:
band1 = np.asarray(X["band_1"].tolist()).reshape(-1, 75,75)
band1 = band1[:,:,:,np.newaxis]
band1 = prep.scaler.transform(band1)

In [None]:
band2 = np.asarray(X["band_2"].tolist()).reshape(-1, 75,75)
band2 = band2[:,:,:,np.newaxis]
band2 = prep.scaler.transform(band2)

In [None]:
band1_preds = model.predict_proba(band1)

In [None]:
band2_preds = model.predict_proba(band2)

In [None]:
pred_concat = pd.DataFrame({'b1':np.squeeze(band1_preds), 'b2':np.squeeze(band2_preds)})

In [None]:
pred_concat['mean'] = pred_concat.mean(axis=1)

In [None]:
results = pd.DataFrame({'id': X['id'], 'is_iceberg': pred_concat['mean']})

In [None]:
results.head()

In [None]:
#results = pd.DataFrame({'id': X['id'], 'is_iceberg': out[:, 1]})
results.to_csv(os.path.join(DATA_DIR, 'submission.csv'), index=False)
print('done.')