Ideas:

* Add FFT channels to CNN (Tried, does not make much difference)
* Finetune CNN (with SGD slow learning rate)
* 5-fold CNN
* Extract Features from CNN (before FC) and do XGB
* TTA (tried, made it better)
* More augmenting, additional 45, 135, 315 degrees
* More augmenting, random rotations and flips
* Predict test data and train with test
* Train on all of the training data (no train-val split)
* Try a different combination of combine predictions
* Fine-tune on pre-trained models (Get rid of some top layers because input size is small)

In this notebook, no data augmentation

In [1]:
import numpy as np
import pandas as pd
from skimage import transform
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, LeakyReLU
from keras import losses, optimizers, callbacks
from keras import regularizers
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate, train_test_split, StratifiedKFold, KFold
from scipy import fftpack
import matplotlib.pyplot as plt
%matplotlib inline
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED)

Using TensorFlow backend.


# Create Dataset 

In [2]:
# Train
df = pd.read_json('./data/train.json')
X, y = [], []
for im_band1, im_band2, label in zip(df['band_1'], df['band_2'], df['is_iceberg']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess
    # - Zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # - Normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
    X.append(im)
    y.append(label)    
X = np.array(X)
y = np.array(y)
print 'X.shape:', X.shape
print 'y.shape:', y.shape

X.shape: (1604, 75, 75, 2)
y.shape: (1604,)


## Train - Val Split

In [3]:
N_SPLITS = 5
MODEL_NUMBER = 1
skf = StratifiedKFold(n_splits=N_SPLITS, random_state=RANDOM_SEED, shuffle=True)
cv = list(skf.split(X, y))

In [4]:
train_i, val_i = cv[MODEL_NUMBER - 1]
X_train, y_train = X[train_i], y[train_i]
X_val, y_val = X[val_i], y[val_i]
print 'X_train.shape:', X_train.shape
print 'y_train.shape:', y_train.shape
print 'X_val.shape:', X_val.shape
print 'y_val.shape:', y_val.shape
print 'np.mean(y_train):', np.mean(y_train)
print 'np.mean(y_val):', np.mean(y_val)

X_train.shape: (1282, 75, 75, 2)
y_train.shape: (1282,)
X_val.shape: (322, 75, 75, 2)
y_val.shape: (322,)
np.mean(y_train): 0.469578783151
np.mean(y_val): 0.468944099379


In [5]:
# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
# print 'X_train.shape:', X_train.shape
# print 'X_val.shape:', X_val.shape
# print 'y_train.shape:', y_train.shape
# print 'y_val.shape:', y_val.shape

## Data Augmentation

In [6]:
def bypass(x):
    return x

def h_flip(x):
    return x[:, :, ::-1, :]

def v_flip(x):
    return x[:, ::-1, :, :]

def hv_flip(x):
    return h_flip(v_flip(x))

def rot90(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 90), axis=0) for im in x], axis=0)

def rot180(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 180), axis=0) for im in x], axis=0)

def rot270(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 270), axis=0) for im in x], axis=0)

def rot45(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 45, mode='reflect'), axis=0) for im in x], axis=0)

def rot135(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 135, mode='reflect'), axis=0) for im in x], axis=0)

def rot315(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 315, mode='reflect'), axis=0) for im in x], axis=0)

aug_funcs = [bypass, 
             h_flip, v_flip, hv_flip,
             rot90, rot180, rot270]

In [7]:
# # Train
# X_train = np.concatenate([func(X_train) for func in aug_funcs], axis=0)
# y_train = np.concatenate([y_train] * len(aug_funcs))

# # Validation
# X_val = np.concatenate([func(X_val) for func in aug_funcs], axis=0)
# y_val = np.concatenate([y_val] * len(aug_funcs))

# # 
print 'X_train.shape:', X_train.shape
print 'y_train.shape:', y_train.shape
print 'X_val.shape:', X_val.shape
print 'y_val.shape:', y_val.shape

X_train.shape: (1282, 75, 75, 2)
y_train.shape: (1282,)
X_val.shape: (322, 75, 75, 2)
y_val.shape: (322,)


# Training

In [8]:
def get_model(input_shape):
    # Architecture
    model = Sequential()
    # Block 1
    model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape))
    model.add(Dropout(0.25))
    # Block 2
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    # Block 3
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.33))
    # Block 4
    model.add(Conv2D(256, (3, 3)))
    model.add(LeakyReLU())          
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    # FC
    model.add(Flatten())
    model.add(Dense(32))
    model.add(LeakyReLU())
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile the model
    loss = losses.binary_crossentropy
    optimizer = optimizers.SGD()
    metrics = ['accuracy']
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    
    #     
    return model

In [9]:
model = get_model(input_shape=(75, 75, 2))

In [10]:
# Callbacks
def get_lr(epoch):
    lr = (np.random.rand() * 4e-2 + 1e-7)
    lr = np.clip(lr, a_min=None, a_max=0.025)
    print 'lr:', lr
    return lr
MODEL_PATH = './models/model10/model' + str(MODEL_NUMBER) + '.h5'
m_q = 'val_loss'
model_path = MODEL_PATH
check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=20, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
schedule_lr = callbacks.LearningRateScheduler(get_lr)
callback_list = [check_pt, early_stop, schedule_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 1282 samples, validate on 322 samples
lr: 0.0244853858868
Epoch 1/200
lr: 0.025
Epoch 2/200
lr: 0.025
Epoch 3/200
lr: 0.025
Epoch 4/200
lr: 0.00443804085621
Epoch 5/200
lr: 0.0194013753029
Epoch 6/200
lr: 0.025
Epoch 7/200
lr: 0.00177595756003
Epoch 8/200
lr: 0.0232440411498
Epoch 9/200
lr: 0.0139823670924
Epoch 10/200
lr: 0.00335207606037
Epoch 11/200
lr: 0.025
Epoch 12/200
lr: 0.00867554203113
Epoch 13/200
lr: 0.0231889798457
Epoch 14/200
lr: 0.0147672552212
Epoch 15/200
lr: 0.025
Epoch 16/200
lr: 0.0056231330413
Epoch 17/200
lr: 0.0092648607171
Epoch 18/200
lr: 0.025
Epoch 19/200
lr: 0.00877584074921
Epoch 20/200
lr: 0.025
Epoch 21/200
lr: 0.0137969162982
Epoch 22/200
lr: 0.00482041362053
Epoch 23/200
lr: 0.025
Epoch 24/200
lr: 0.025
Epoch 25/200
lr: 0.000807701384504
Epoch 26/200
lr: 0.0178745658824
Epoch 27/200
lr: 0.025
Epoch 28/200


lr: 0.00376659631732
Epoch 29/200
lr: 0.0239774846194
Epoch 30/200
lr: 0.025
Epoch 31/200
lr: 0.0246769788633
Epoch 32/200
lr: 0.00205869931629
Epoch 33/200
lr: 0.025
Epoch 34/200
lr: 0.0236331372681
Epoch 35/200
lr: 0.025
Epoch 36/200
lr: 0.00389068332302
Epoch 37/200
lr: 0.0245359707527
Epoch 38/200
lr: 0.0196693217334
Epoch 39/200
lr: 0.025
Epoch 40/200
lr: 0.025
Epoch 41/200
lr: 0.0244400091734
Epoch 42/200
lr: 0.025
Epoch 43/200
lr: 0.00172642369073
Epoch 44/200
lr: 0.025
Epoch 45/200
lr: 0.0196848575368
Epoch 46/200
lr: 0.00390501862403
Epoch 47/200
lr: 0.0146598695364
Epoch 48/200
lr: 0.025
Epoch 49/200
lr: 0.0152021242021
Epoch 50/200
lr: 0.0142914303055
Epoch 51/200
lr: 0.00462439819496
Epoch 52/200
lr: 0.0212222114826
Epoch 53/200
lr: 0.0230635431741
Epoch 54/200
lr: 0.025
Epoch 55/200
lr: 0.00448461454212
Epoch 56/200
lr: 0.025
Epoch 57/200
lr: 0.0147107508274
Epoch 58/200


lr: 0.0245896308004
Epoch 59/200
lr: 0.0176581293416
Epoch 60/200
lr: 0.0209315534808
Epoch 61/200
lr: 0.025
Epoch 62/200
lr: 0.025
Epoch 63/200
lr: 0.0123166251702
Epoch 64/200
lr: 0.0227419145103
Epoch 65/200
lr: 0.025
Epoch 66/200
lr: 0.00728312986265
Epoch 67/200
lr: 0.0236316573285
Epoch 68/200
lr: 0.00022038462483
Epoch 69/200
lr: 0.0162004634331
Epoch 70/200
lr: 0.025
Epoch 71/200
lr: 0.022094467159
Epoch 72/200
lr: 0.00713311403587
Epoch 73/200
lr: 0.00769115167793
Epoch 74/200
lr: 0.0114152787045
Epoch 75/200
Epoch 00075: early stopping


<keras.callbacks.History at 0x7fce79adaa10>

In [11]:
# # Finetune
# loss = losses.binary_crossentropy
# optimizer = optimizers.SGD(lr=1e-4)
# metrics = ['accuracy']
# model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [None]:
# m_q = 'val_loss'
# model_path = MODEL_PATH
# check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
# early_stop = callbacks.EarlyStopping(patience=5, monitor=m_q, verbose=1)
# reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
# callback_list = [check_pt, early_stop, reduce_lr]

# model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

# Predict Test

In [3]:
# Load test data
df = pd.read_json('./data/test.json')
X_test, y_test = [], []
for im_band1, im_band2 in zip(df['band_1'], df['band_2']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess - zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # Preprocess - normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
    X_test.append(im)    
X_test = np.array(X_test)
print 'X_test.shape:', X_test.shape

X_test.shape: (8424, 75, 75, 2)


In [12]:
# Predict without TTA
y_test_p = model.predict(X_test, verbose=1)
y_test_p



array([[  4.32659319e-04],
       [  8.25419486e-01],
       [  9.69080150e-01],
       ..., 
       [  9.57645752e-05],
       [  9.99154449e-01],
       [  9.99993563e-01]], dtype=float32)

In [5]:
y_test_p = 0
# weights = [0.25, 0.4 / 3, 0.35, 0.4 / 3, 0.4 / 3]
weights = [0.2, 0.2, 0.2, 0.2, 0.2]
for i, w in zip(range(5), weights):
    print i
    # Load the model
    MODEL_PATH = './models/model10/model' + str(i + 1) + '.h5'
    model = load_model(MODEL_PATH)
    # predict - tta    
    for func in aug_funcs:
        y_test_p += model.predict(func(X_test), verbose=1).flatten() * w
# y_test_p = y_test_p / (len(aug_funcs) * 5.0)
y_test_p = y_test_p / (len(aug_funcs) * sum(weights))

0
1
2
3
4


In [6]:
df_sub = pd.DataFrame()
df_sub['id'] = df['id']
df_sub['is_iceberg'] = y_test_p.flatten()
df_sub.to_csv('./submissions/sub21.csv', index=False)