Building CNN using Keras

In [None]:
%matplotlib inline

import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

seed = 0
np.random.seed(seed)

from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score

# Local imports
import adjust_path  # Before doing any local imports
from icc.data_loader import DataLoader
from icc.contrib.preprocessing.utils import *
import icc.models.spencer.convnets_base as nets
from icc.models.convnets_playground import ConvnetsBox

In [None]:
X, y = DataLoader.load_train()

## Research bench: only run if you are still experimenting with your networks.

In [None]:
def run_kfold_training(init_model, X, y, k: int=5, epochs: int=100, seed: int=0):
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=15, verbose=1, 
                                           epsilon=1e-4, mode='min')
    skfold = StratifiedKFold(n_splits=k, shuffle=True, random_state=seed)

    scores = {'loss':[], 'acc':[]}
    for fold_nr, (train, validate) in enumerate(skfold.split(X, y)):
        print('=> Fold: {}'.format(fold_nr))
        
        model = init_model()
        model.fit(X[train], y[train], 
                  epochs=epochs,
                  verbose=2,
                  batch_size=24,
                  validation_data=(X[validate], y[validate]),
                  callbacks=[reduce_lr_loss])

        score = model.evaluate(X[validate], y[validate], verbose=1)
        print('\n')
        print('Val score:', score[0])
        print('Val accuracy:', score[1])
        print('\n')
        scores['loss'].append(score[0])
        scores['acc'].append(score[1])

    return (scores, model)


def model_performance(y_true, y_pred, scores):
    metrics = {}
    metrics['mean_loss'] = np.array(scores['loss']).mean()
    metrics['mean_acc'] = np.array(scores['acc']).mean()
    metrics['stdev_loss'] = np.array(scores['loss']).std()
    metrics['stdev_acc'] = np.array(scores['acc']).std()
    metrics['f1score'] = f1_score(y_true, y_pred)
    metrics['rocaucscore'] = roc_auc_score(y_true, y_pred)
    
    print('Mean loss: {:.3f}'.format(metrics['mean_loss']))
    print('Mean acc: {:.3f}'.format(metrics['mean_acc']))
    print('Stdev loss: {:.3f}'.format(metrics['stdev_loss']))
    print('Stdev acc: {:.3f}'.format(metrics['stdev_acc']))
    print('f1 score: {:.3f}'.format(metrics['f1score']))
    print('roc & auc score: {:.3f}'.format(metrics['rocaucscore']))
    return metrics

In [None]:
prep = Preprocess()
X_filt, y_filt = prep.filter_angle(X, y)
Xtrain, Xtest, Ytrain, Ytest = prep._basic_trainset(X_filt, y_filt, how='deep', test_size=0.1)

In [None]:
# Only run for testing out experiments and not actually fitting a model you will save the weights to.
scores, model = run_kfold_training(nets.convnetBlue, Xtrain, Ytrain, k=2)

probs = model.predict_proba(Xtest)
Ypreds = [1 if p >= 0.5 else 0 for p in probs]

model_metrics = model_performance(Ytest, Ypreds, scores)

## Satisfied with your findings, then proceed to the actual training which will be used on the submission file

In [None]:
final_model = ConvnetsBox(nets.convnetBlue, epochs=100, wdir='./saved_model/results/test')
final_model.fit(X, y)

## Not sure what to do from here with the stacking classifier.

In [None]:
# Save graph in case you decided to clear all. This will allow you to rebuild the graph and reload your favorite weights.
model = nets.convnetBlue()
save_graph_layout(model, 'blue-model-blueprint.json')

In [None]:
# Loading the graph again
model = load_graph_layout('blue-model-blueprint.json')

# Loading favorite weights.
wdir = 'path/to/weights'
best = 'weights-VAcc0.9257-TrAcc0.9992-VLoss0.3768-Ep39.hdf5'
model.load_weights(filepath=os.path.join(wdir, best))

## Compare stacking submissions

In [None]:
wdir = './saved_model/results/submissions/'
stacked_1 = pd.read_csv(wdir + 'subm-blue-bestLoss_0.249-bestAcc_0.912.csv')
stacked_2 = pd.read_csv(wdir + 'subm-white-bestLoss_0.377-bestAcc_0.926.csv')

In [None]:
concat_subm = pd.concat([stacked_1, stacked_2['is_iceberg']], axis=1)
cols = list(map(lambda x: "is_iceberg_" + str(x), range(len(concat_subm.columns[1:]))))
concat_subm.columns = ['id'] + cols
concat_subm.head()

In [None]:
# checking correlation
concat_sub.corr()