Ideas:

* Add FFT channels to CNN (Tried, does not make much difference)
* Finetune CNN (with SGD slow learning rate)
* 5-fold CNN
* Extract Features from CNN (before FC) and do XGB
* TTA (tried, made it better)
* More augmenting, additional 45, 135, 315 degrees
* More augmenting, random rotations and flips
* Predict test data and train with test
* Train on all of the training data (no train-val split)
* Try a different combination of combine predictions
* Fine-tune on pre-trained models (Get rid of some top layers because input size is small)

In this notebook, add Test data predictions to Training

In [1]:
import numpy as np
import pandas as pd
from skimage import transform
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, LeakyReLU
from keras import losses, optimizers, callbacks
from keras import regularizers
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from scipy.signal import fftconvolve
from sklearn.model_selection import cross_validate, train_test_split, StratifiedKFold, KFold
from scipy import fftpack
import matplotlib.pyplot as plt
%matplotlib inline
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED)

Using TensorFlow backend.


# Create Dataset 

In [2]:
# Train
df_train = pd.read_json('./data/train.json')

# Test
df_test = pd.read_json('./data/test.json')
df_test_labels = pd.read_csv('./submissions/ourbest_explorestack_2.csv')
df_test = pd.merge(df_test, df_test_labels, on='id')
iceberg_cond = (df_test['is_iceberg'] >= 0.9) & (df_test['is_iceberg'] <= 1.0)
not_iceberg_cond = (df_test['is_iceberg'] >= 0.0) & (df_test['is_iceberg'] <= 0.1)
df_test = df_test[iceberg_cond | not_iceberg_cond]

# Merge Train and Test
df = pd.concat([df_train, df_test])
df
print len(df)

4927


In [3]:
def normalize(im):
    im = im - np.mean(im)
    im = im / np.std(im)
    return im

def get_convolve(im1, im2):
    im1 = im1 - np.mean(im1)
    im2 = im2 - np.mean(im2)
    im_conv = fftconvolve(im1, im2[::-1, ::-1], mode='same')
    return normalize(im_conv)

In [4]:
X, y = [], []
for im_band1, im_band2, label in zip(df['band_1'], df['band_2'], df['is_iceberg']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess
    # - Zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # - Normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
#     im = np.concatenate([normalize(im_band1), normalize(im_band2), get_convolve(im_band1, im_band2)], axis=2)
    X.append(im)
    y.append(label)    
X = np.array(X)
y = np.array(y)
print 'X.shape:', X.shape
print 'y.shape:', y.shape

X.shape: (4927, 75, 75, 2)
y.shape: (4927,)


## Train - Val Split

In [5]:
N_SPLITS = 9
MODEL_NUMBER = 9
skf = StratifiedKFold(n_splits=N_SPLITS, random_state=RANDOM_SEED, shuffle=True)
cv = list(skf.split(X, y > 0.5))

In [6]:
train_i, val_i = cv[MODEL_NUMBER - 1]
X_train, y_train = X[train_i], y[train_i]
X_val, y_val = X[val_i], y[val_i]
print 'X_train.shape:', X_train.shape
print 'y_train.shape:', y_train.shape
print 'X_val.shape:', X_val.shape
print 'y_val.shape:', y_val.shape
print 'np.mean(y_train):', np.mean(y_train)
print 'np.mean(y_val):', np.mean(y_val)

X_train.shape: (4380, 75, 75, 2)
y_train.shape: (4380,)
X_val.shape: (547, 75, 75, 2)
y_val.shape: (547,)
np.mean(y_train): 0.430118419441
np.mean(y_val): 0.430064447715


In [7]:
# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
# print 'X_train.shape:', X_train.shape
# print 'X_val.shape:', X_val.shape
# print 'y_train.shape:', y_train.shape
# print 'y_val.shape:', y_val.shape

## Data Augmentation

In [2]:
def bypass(x):
    return x

def h_flip(x):
    return x[:, :, ::-1, :]

def v_flip(x):
    return x[:, ::-1, :, :]

def hv_flip(x):
    return h_flip(v_flip(x))

def rot90(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 90), axis=0) for im in x], axis=0)

def rot180(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 180), axis=0) for im in x], axis=0)

def rot270(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 270), axis=0) for im in x], axis=0)

def rot45(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 45, mode='reflect'), axis=0) for im in x], axis=0)

def rot135(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 135, mode='reflect'), axis=0) for im in x], axis=0)

def rot315(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 315, mode='reflect'), axis=0) for im in x], axis=0)

aug_funcs = [bypass, 
             h_flip, v_flip, hv_flip,
             rot90, rot180, rot270]

In [9]:
# Train
X_train = np.concatenate([func(X_train) for func in aug_funcs], axis=0)
y_train = np.concatenate([y_train] * len(aug_funcs))

# Validation
X_val = np.concatenate([func(X_val) for func in aug_funcs], axis=0)
y_val = np.concatenate([y_val] * len(aug_funcs))

# 
print 'X_train.shape:', X_train.shape
print 'y_train.shape:', y_train.shape
print 'X_val.shape:', X_val.shape
print 'y_val.shape:', y_val.shape

X_train.shape: (30660, 75, 75, 2)
y_train.shape: (30660,)
X_val.shape: (3829, 75, 75, 2)
y_val.shape: (3829,)


# Training

In [10]:
def get_model(input_shape):
    # Architecture
    model = Sequential()
    # Block 1
    model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape))
    model.add(Dropout(0.25))
    # Block 2
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    # Block 3
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    # Block 4
    model.add(Conv2D(256, (3, 3)))
    model.add(LeakyReLU())          
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    # FC
    model.add(Flatten())
    model.add(Dense(32))
    model.add(LeakyReLU())
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile the model
    loss = losses.binary_crossentropy
    optimizer = optimizers.Adam()
    metrics = ['accuracy']
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    
    #     
    return model

In [11]:
model = get_model(input_shape=(75, 75, 2))

In [12]:
# Callbacks
def get_lr(epoch):
    lr = (np.random.rand() * 4e-2 + 1e-7)
    lr = np.clip(lr, a_min=None, a_max=0.025)
    print 'lr:', lr
    return lr
MODEL_PATH = './models/model12-v5/model' + str(MODEL_NUMBER) + '.h5'
m_q = 'val_loss'
model_path = MODEL_PATH
check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=15, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
schedule_lr = callbacks.LearningRateScheduler(get_lr)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200, batch_size=32)

Train on 30660 samples, validate on 3829 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200

Epoch 00006: reducing learning rate to 0.000330000015674.
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00011: reducing learning rate to 0.000108900003252.
Epoch 12/200
Epoch 13/200

Epoch 00013: reducing learning rate to 3.59369999205e-05.
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 1.18592095896e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 3.91353921259e-06.
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 1.29146797917e-06.
Epoch 20/200
Epoch 21/200

Epoch 00021: reducing learning rate to 4.26184415119e-07.
Epoch 22/200
Epoch 23/200

Epoch 00023: reducing learning rate to 1.40640856614e-07.
Epoch 00023: early stopping


<keras.callbacks.History at 0x7fa6d77b3310>

In [13]:
# # Finetune
# loss = losses.binary_crossentropy
# optimizer = optimizers.SGD(lr=1e-2)
# metrics = ['accuracy']
# model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [None]:
# m_q = 'val_loss'
# model_path = MODEL_PATH
# check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
# early_stop = callbacks.EarlyStopping(patience=5, monitor=m_q, verbose=1)
# reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
# callback_list = [check_pt, early_stop, schedule_lr]

# model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

# Predict Test

In [3]:
# Load test data
df = pd.read_json('./data/test.json')
X_test, y_test = [], []
for im_band1, im_band2 in zip(df['band_1'], df['band_2']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess - zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # Preprocess - normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
    X_test.append(im)    
X_test = np.array(X_test)
print 'X_test.shape:', X_test.shape

X_test.shape: (8424, 75, 75, 2)


In [6]:
y_test_p = 0
# weights = [0.25, 0.4 / 3, 0.35, 0.4 / 3, 0.4 / 3]
# weights = [0.2, 0.18, 0.2, 0.2, 0.22]
weights = [0.2] * 9
for i, w in zip(range(9), weights):
    print i
    # Load the model
    MODEL_PATH = './models/model12-v5/model' + str(i + 1) + '.h5'
    model = load_model(MODEL_PATH)
    # predict - tta    
    for func in aug_funcs:
        y_test_p += model.predict(func(X_test), verbose=1).flatten() * w
# y_test_p = y_test_p / (len(aug_funcs) * 5.0)
y_test_p = y_test_p / (len(aug_funcs) * sum(weights))

0
1
2
3
4
5
6
7
8


In [7]:
df_sub = pd.DataFrame()
df_sub['id'] = df['id']
df_sub['is_iceberg'] = y_test_p.flatten()
df_sub.to_csv('./submissions/sub31.csv', index=False)