Ideas:

* Add FFT channels to CNN (Tried, does not make much difference)
* Finetune CNN (with SGD slow learning rate)
* 5-fold CNN
* Extract Features from CNN (before FC) and do XGB
* TTA (tried, made it better)
* More augmenting, additional 45, 135, 315 degrees
* More augmenting, random rotations and flips
* Predict test data and train with test
* Train on all of the training data (no train-val split)
* Try a different combination of combine predictions
* Fine-tune on pre-trained models (Get rid of some top layers because input size is small)

In [1]:
import numpy as np
import pandas as pd
from skimage import transform
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from keras import losses, optimizers, callbacks
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate, train_test_split
from scipy import fftpack
import random
import matplotlib.pyplot as plt
%matplotlib inline
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

Using TensorFlow backend.


# Create Dataset 

In [2]:
# Train
df = pd.read_json('./data/train.json')
X_train, y_train = [], []
for im_band1, im_band2, label in zip(df['band_1'], df['band_2'], df['is_iceberg']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess
    # - Zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # - Normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
    # Append
    X_train.append(im)
    y_train.append(label)    
X_train = np.array(X_train)
y_train = np.array(y_train)
print 'X_train.shape:', X_train.shape
print 'y_train.shape:', y_train.shape

X_train.shape: (1604, 75, 75, 2)
y_train.shape: (1604,)


## Train - Val Split

In [3]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=RANDOM_SEED)
print 'X_train.shape:', X_train.shape
print 'X_val.shape:', X_val.shape
print 'y_train.shape:', y_train.shape
print 'y_val.shape:', y_val.shape

X_train.shape: (1283, 75, 75, 2)
X_val.shape: (321, 75, 75, 2)
y_train.shape: (1283,)
y_val.shape: (321,)


## Data Augmentation

In [4]:
def bypass(im):
    return im

def h_flip(im):
    return im[:, ::-1]

def v_flip(x):
    return im[::-1, :]

def hv_flip(x):
    return h_flip(v_flip(x))

def random_rot(im):
    angle = int(np.random.rand() * 360.0)
    return transform.rotate(im, angle=angle, mode='reflect')

aug_funcs = [h_flip, v_flip, random_rot]             

In [5]:
def data_generator(X, y, aug_funcs, batch_size=32, augment=True):
    """
    Generates random data
    """
    X = list(np.copy(X))
    y = list(np.copy(y))
    X_y = zip(X, y)
    while True:
        X_y_batch = random.sample(X_y, batch_size)
        X_batch = [e[0] for e in X_y_batch]
        y_batch = [e[1] for e in X_y_batch]
        # Random augmentation        
        for i, x in enumerate(X_batch):            
            funcs2apply = random.sample(aug_funcs, np.random.randint(len(aug_funcs) + 1))
            for f in funcs2apply:
                x = f(x)
            X_batch[i] = x
        yield np.array(X_batch), np.array(y_batch) 

# Training

In [6]:
def get_model(input_shape):
    # Architecture
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile the model
    loss = losses.binary_crossentropy
    optimizer = optimizers.Adam()
    metrics = ['accuracy']
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    
    #     
    return model

In [7]:
model = get_model(input_shape=(75, 75, 2))

In [8]:
# Callbacks
MODEL_PATH = './models/model5.h5'
m_q = 'val_loss'
model_path = MODEL_PATH
check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=8, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

# Fit generator
batch_size = 32
steps_per_epoch = (len(X_train) / batch_size) * 8
gen_obj = data_generator(X_train, y_train, batch_size=batch_size, aug_funcs=aug_funcs)
model.fit_generator(gen_obj, steps_per_epoch=steps_per_epoch, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00009: reducing learning rate to 0.000330000015674.
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200

Epoch 00015: reducing learning rate to 0.000108900003252.
Epoch 17/200
Epoch 18/200

Epoch 00017: reducing learning rate to 3.59369999205e-05.
Epoch 19/200
Epoch 20/200

Epoch 00019: reducing learning rate to 1.18592095896e-05.
Epoch 21/200
Epoch 22/200

Epoch 00021: reducing learning rate to 3.91353921259e-06.
Epoch 00021: early stopping


<keras.callbacks.History at 0x7f3cb0bd9890>

In [9]:
# Callbacks
MODEL_PATH = './models/model5-i3.h5'
m_q = 'val_loss'
model_path = MODEL_PATH
check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=8, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

# Fit generator
batch_size = 32
steps_per_epoch = (len(X_train) / batch_size) * 8
gen_obj = data_generator(X_train, y_train, batch_size=batch_size, aug_funcs=aug_funcs)
model.fit_generator(gen_obj, steps_per_epoch=steps_per_epoch, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00006: reducing learning rate to 1.29146797917e-06.
Epoch 8/200
Epoch 9/200

Epoch 00008: reducing learning rate to 4.26184415119e-07.
Epoch 10/200
Epoch 11/200

Epoch 00010: reducing learning rate to 1.40640856614e-07.
Epoch 12/200
Epoch 13/200

Epoch 00012: reducing learning rate to 4.64114805254e-08.
Epoch 00012: early stopping


<keras.callbacks.History at 0x7f3cb0bd97d0>

In [10]:
# Callbacks
model = load_model('./models/model5.h5')
MODEL_PATH = './models/model5-i2.h5'
m_q = 'val_loss'
model_path = MODEL_PATH
check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=8, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

# Fit generator
batch_size = 32
steps_per_epoch = (len(X_train) / batch_size) * 8
gen_obj = data_generator(X_train, y_train, batch_size=batch_size, aug_funcs=aug_funcs)
model.fit_generator(gen_obj, steps_per_epoch=steps_per_epoch, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200

Epoch 00004: reducing learning rate to 0.000108900003252.
Epoch 6/200
Epoch 7/200

Epoch 00006: reducing learning rate to 3.59369999205e-05.
Epoch 8/200
Epoch 9/200

Epoch 00008: reducing learning rate to 1.18592095896e-05.
Epoch 10/200
Epoch 11/200

Epoch 00010: reducing learning rate to 3.91353921259e-06.
Epoch 00010: early stopping


<keras.callbacks.History at 0x7f3ca411e510>

In [None]:
# # Finetune
# loss = losses.binary_crossentropy
# optimizer = optimizers.SGD(lr=1e-4)
# metrics = ['accuracy']
# model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [11]:
# m_q = 'val_loss'
# model_path = MODEL_PATH
# check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
# early_stop = callbacks.EarlyStopping(patience=5, monitor=m_q, verbose=1)
# reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
# callback_list = [check_pt, early_stop, reduce_lr]

# model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

# Predict Test

In [2]:
# Load test data
df = pd.read_json('./data/test.json')
X_test, y_test = [], []
for im_band1, im_band2 in zip(df['band_1'], df['band_2']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)   
    # Preprocess - zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)    
    # Preprocess - normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
    X_test.append(im)    
X_test = np.array(X_test)
print 'X_test.shape:', X_test.shape

X_test.shape: (8424, 75, 75, 2)


In [12]:
# Load the model
MODEL_PATH = './models/model5.h5'
model = load_model(MODEL_PATH)

In [13]:
# predict
y_test_p = model.predict(X_test, verbose=1)



In [14]:
# predict - tta
def bypass(x):
    return x

def h_flip(x):
    return x[:, :, ::-1, :]

def v_flip(x):
    return x[:, ::-1, :, :]

def hv_flip(x):
    return h_flip(v_flip(x))

def rot90(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 90), axis=0) for im in x], axis=0)

def rot180(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 180), axis=0) for im in x], axis=0)

def rot270(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 270), axis=0) for im in x], axis=0)

def rot45(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 45, mode='reflect'), axis=0) for im in x], axis=0)

def rot135(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 135, mode='reflect'), axis=0) for im in x], axis=0)

def rot315(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 315, mode='reflect'), axis=0) for im in x], axis=0)

tta_aug_funcs = [bypass, 
                 h_flip, v_flip, hv_flip,
                 rot90, rot180, rot270,
                 rot45, rot135, rot315]

y_test_p = 0
for func in tta_aug_funcs:
    y_test_p += model.predict(func(X_test), verbose=1).flatten()
y_test_p = y_test_p / len(tta_aug_funcs)



In [15]:
df_sub = pd.DataFrame()
df_sub['id'] = df['id']
df_sub['is_iceberg'] = y_test_p.flatten()
df_sub.to_csv('./submissions/sub13.csv', index=False)