Ideas:

* Add FFT channels to CNN (Tried, does not make much difference)
* Finetune CNN (with SGD slow learning rate)
* 5-fold CNN
* Extract Features from CNN (before FC) and do XGB
* TTA (tried, made it better)
* More augmenting, additional 45, 135, 315 degrees
* More augmenting, random rotations and flips
* Predict test data and train with test
* Train on all of the training data (no train-val split)
* Try a different combination of combine predictions
* Fine-tune on pre-trained models (Get rid of some top layers because input size is small)

In this notebook, add Test data predictions to Training

* [Create Dataset](#Create-Dataset)
* [Train - Val Split](#Train---Val-Split)
* [Data Augmentation](#Data-Augmentation)
* [Training](#Training)
* [Predict Test](#Predict-Test)

In [1]:
import numpy as np
import pandas as pd
from skimage import transform
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, LeakyReLU
from keras import losses, optimizers, callbacks
from keras import regularizers
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from scipy.signal import fftconvolve
from sklearn.model_selection import cross_validate, train_test_split, StratifiedKFold, KFold
from scipy import fftpack
import matplotlib.pyplot as plt
%matplotlib inline
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED)

Using TensorFlow backend.


# Create Dataset 

In [2]:
# Train
df_train = pd.read_json('./data/train.json')

# Test
df_test = pd.read_json('./data/test.json')
df_test_labels = pd.read_csv('./submissions/ourbest_explorestack_2.csv')
df_test = pd.merge(df_test, df_test_labels, on='id')
iceberg_cond = (df_test['is_iceberg'] >= 0.9) & (df_test['is_iceberg'] <= 1.0)
not_iceberg_cond = (df_test['is_iceberg'] >= 0.0) & (df_test['is_iceberg'] <= 0.1)
df_test = df_test[iceberg_cond | not_iceberg_cond]

# Merge Train and Test
df = pd.concat([df_train, df_test])
df
print len(df)

4927


In [3]:
def normalize(im):
    im = im - np.mean(im)
    im = im / np.std(im)
    return im

def get_convolve(im1, im2):
    im1 = im1 - np.mean(im1)
    im2 = im2 - np.mean(im2)
    im_conv = fftconvolve(im1, im2[::-1, ::-1], mode='same')
    return normalize(im_conv)

In [4]:
X, y = [], []
for im_band1, im_band2, label in zip(df['band_1'], df['band_2'], df['is_iceberg']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess
    # - Zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # - Normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
#     im = np.concatenate([normalize(im_band1), normalize(im_band2), get_convolve(im_band1, im_band2)], axis=2)
    X.append(im)
    y.append(label)    
X = np.array(X)
y = np.array(y)
print 'X.shape:', X.shape
print 'y.shape:', y.shape

X.shape: (4927, 75, 75, 2)
y.shape: (4927,)


In [5]:
def bypass(x):
    return x

def h_flip(x):
    return x[:, :, ::-1, :]

def v_flip(x):
    return x[:, ::-1, :, :]

def hv_flip(x):
    return h_flip(v_flip(x))

def rot90(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 90), axis=0) for im in x], axis=0)

def rot180(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 180), axis=0) for im in x], axis=0)

def rot270(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 270), axis=0) for im in x], axis=0)

def rot45(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 45, mode='reflect'), axis=0) for im in x], axis=0)

def rot135(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 135, mode='reflect'), axis=0) for im in x], axis=0)

def rot315(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 315, mode='reflect'), axis=0) for im in x], axis=0)

aug_funcs = [bypass, 
             h_flip, v_flip, hv_flip,
             rot90, rot180, rot270]

In [6]:
# 
def get_random_split(size, train_ratio):
    indices = range(size)
    cutoff = int(round(train_ratio * size))
    np.random.shuffle(indices)
    train_i = indices[:cutoff]
    test_i = indices[cutoff:]
    return train_i, test_i

In [7]:
def get_model(input_shape):
        # Architecture
        model = Sequential()
        # Block 1
        model.add(Conv2D(32, kernel_size=(3, 3),
                         activation='relu',
                         input_shape=input_shape))
        model.add(Dropout(0.25))
        # Block 2
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        # Block 3
        model.add(Conv2D(128, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        # Block 4
        model.add(Conv2D(256, (3, 3)))
        model.add(LeakyReLU())          
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.5))
        # FC
        model.add(Flatten())
        model.add(Dense(32))
        model.add(LeakyReLU())
        model.add(Dropout(0.5))
        model.add(Dense(1, activation='sigmoid'))

        # Compile the model
        loss = losses.binary_crossentropy
        optimizer = optimizers.Adam()
        metrics = ['accuracy']
        model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

        #     
        return model

# For - Loop

In [8]:
# Parameters
N_MODELS = 30
train_ratio = 0.5
MODEL_NUMBERS = range(1, N_MODELS + 1)
# For - Loop
for MODEL_NUMBER in MODEL_NUMBERS:
    print '*** MODEL:', MODEL_NUMBER, ' ***'
    # *** Train - Val Split ***
    train_i, val_i = get_random_split(len(X), train_ratio)

    #
    X_train, y_train = X[train_i], y[train_i]
    X_val, y_val = X[val_i], y[val_i]
    print 'train_i[:5]', train_i[:5]
    print 'val_i[:5]', val_i[:5]
    print 'X_train.shape:', X_train.shape
    print 'y_train.shape:', y_train.shape
    print 'X_val.shape:', X_val.shape
    print 'y_val.shape:', y_val.shape
    print 'np.mean(y_train):', np.mean(y_train)
    print 'np.mean(y_val):', np.mean(y_val)
    
    # *** Data Augmentation ***
    # Train
    X_train = np.concatenate([func(X_train) for func in aug_funcs], axis=0)
    y_train = np.concatenate([y_train] * len(aug_funcs))

    # Validation
    X_val = np.concatenate([func(X_val) for func in aug_funcs], axis=0)
    y_val = np.concatenate([y_val] * len(aug_funcs))

    # 
    print 'X_train.shape:', X_train.shape
    print 'y_train.shape:', y_train.shape
    print 'X_val.shape:', X_val.shape
    print 'y_val.shape:', y_val.shape
    
    # *** Training ***
    model = get_model(input_shape=(75, 75, 2))
    # Callbacks
    def get_lr(epoch):
        lr = (np.random.rand() * 4e-2 + 1e-7)
        lr = np.clip(lr, a_min=None, a_max=0.025)
        print 'lr:', lr
        return lr
    MODEL_PATH = './models/model16/model' + str(MODEL_NUMBER) + '.h5'
    m_q = 'val_loss'
    model_path = MODEL_PATH
    check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
    early_stop = callbacks.EarlyStopping(patience=10, monitor=m_q, verbose=1)
    reduce_lr = callbacks.ReduceLROnPlateau(patience=2, factor=0.33, monitor=m_q, verbose=1)
    schedule_lr = callbacks.LearningRateScheduler(get_lr)
    callback_list = [check_pt, early_stop, reduce_lr]
    # fit
    model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200, batch_size=32)

*** MODEL: 1  ***
train_i[:5] [4630, 1750, 2066, 4474, 4720]
val_i[:5] [2465, 2849, 2680, 1086, 2103]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.433649795455
np.mean(y_val): 0.426573623244
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00010: reducing learning rate to 0.000330000015674.
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 0.000108900003252.
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 3.59369999205e-05.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 1.18592095896e-05.
Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 3.91353921259e-06.
Epoch 21/200
Epoch

Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00010: reducing learning rate to 0.000330000015674.
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 0.000108900003252.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 3.59369999205e-05.
Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 1.18592095896e-05.
Epoch 21/200
Epoch 22/200

Epoch 00022: reducing learning rate to 3.91353921259e-06.
Epoch 23/200
Epoch 00023: early stopping
*** MODEL: 3  ***
train_i[:5] [165, 1350, 2740, 378, 4834]
val_i[:5] [3246, 914, 3724, 3870, 3462]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.417257916782
np.mean(y_val): 0.442972157166
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Ep

Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00011: reducing learning rate to 0.000330000015674.
Epoch 12/200
Epoch 13/200

Epoch 00013: reducing learning rate to 0.000108900003252.
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 3.59369999205e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 1.18592095896e-05.
Epoch 18/200
Epoch 00018: early stopping
*** MODEL: 4  ***
train_i[:5] [2839, 3307, 3095, 3035, 2303]
val_i[:5] [943, 785, 3850, 2102, 1582]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.42529358748
np.mean(y_val): 0.434933223914
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Ep

Epoch 17/200
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 0.000108900003252.
Epoch 20/200
Epoch 21/200

Epoch 00021: reducing learning rate to 3.59369999205e-05.
Epoch 22/200
Epoch 23/200

Epoch 00023: reducing learning rate to 1.18592095896e-05.
Epoch 24/200
Epoch 25/200

Epoch 00025: reducing learning rate to 3.91353921259e-06.
Epoch 26/200
Epoch 00026: early stopping
*** MODEL: 5  ***
train_i[:5] [550, 4619, 894, 480, 454]
val_i[:5] [4362, 4651, 3886, 3718, 4581]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.430245923072
np.mean(y_val): 0.429978877629
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200

Epoch 00012: reducing lear

Epoch 18/200

Epoch 00018: reducing learning rate to 3.59369999205e-05.
Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 1.18592095896e-05.
Epoch 21/200
Epoch 22/200

Epoch 00022: reducing learning rate to 3.91353921259e-06.
Epoch 23/200
Epoch 24/200

Epoch 00024: reducing learning rate to 1.29146797917e-06.
Epoch 25/200
Epoch 26/200

Epoch 00026: reducing learning rate to 4.26184415119e-07.
Epoch 27/200
Epoch 28/200

Epoch 00028: reducing learning rate to 1.40640856614e-07.
Epoch 29/200
Epoch 30/200

Epoch 00030: reducing learning rate to 4.64114805254e-08.
Epoch 00030: early stopping
*** MODEL: 6  ***
train_i[:5] [4884, 33, 2555, 249, 4256]
val_i[:5] [4001, 2091, 3475, 3978, 590]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.418930683117
np.mean(y_val): 0.441298711673
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on

Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 3.59369999205e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 1.18592095896e-05.
Epoch 18/200
Epoch 00018: early stopping
*** MODEL: 7  ***
train_i[:5] [4779, 101, 3889, 4481, 3642]
val_i[:5] [4646, 3543, 3578, 4615, 982]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.42709809789
np.mean(y_val): 0.433127980857
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200

Epoch 00006: reducing learning rate to 0.000330000015674.
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200

Epoch 00012: reducing learning rate to 0.000108900003252.
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 3.59369999205e-05.
Epoch 

Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 0.000330000015674.
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 0.000108900003252.
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200

Epoch 00024: reducing learning rate to 3.59369999205e-05.
Epoch 25/200
Epoch 26/200

Epoch 00026: reducing learning rate to 1.18592095896e-05.
Epoch 27/200
Epoch 28/200

Epoch 00028: reducing learning rate to 3.91353921259e-06.
Epoch 29/200
Epoch 30/200



Epoch 00030: reducing learning rate to 1.29146797917e-06.
Epoch 31/200
Epoch 00031: early stopping
*** MODEL: 9  ***
train_i[:5] [3802, 3387, 157, 1873, 3865]
val_i[:5] [903, 3429, 844, 2239, 7]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.428735114306
np.mean(y_val): 0.431490299797
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00007: reducing learning rate to 0.000330000015674.
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200

Epoch 00012: reducing learning rate to 0.000108900003252.
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 3.59369999205e-05.
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 1.18592095896e-05.
Epoch 17/200
Epoch 18/200

Epoch 00

Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00007: reducing learning rate to 0.000330000015674.
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200

Epoch 00012: reducing learning rate to 0.000108900003252.
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 3.59369999205e-05.
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 1.18592095896e-05.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 3.91353921259e-06.
Epoch 19/200
Epoch 00019: early stopping
*** MODEL: 11  ***
train_i[:5] [1490, 3280, 1479, 977, 90]
val_i[:5] [2542, 2968, 3590, 3314, 4500]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.427161967979
np.mean(y_val): 0.433064084836
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch

Epoch 13/200

Epoch 00013: reducing learning rate to 3.59369999205e-05.
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 1.18592095896e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 3.91353921259e-06.
Epoch 18/200
Epoch 00018: early stopping
*** MODEL: 12  ***
train_i[:5] [1675, 4616, 974, 283, 3124]
val_i[:5] [3288, 1009, 451, 1027, 1343]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.414199931981
np.mean(y_val): 0.446031383536
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200

Epoch 00006: reducing learning rate to 0.000330000015674.
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00011: reducing learning rate to 0.000108900003252.
Epoch 12/200
Epoch 13/200

Epoc

Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200

Epoch 00012: reducing learning rate to 0.000330000015674.
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 0.000108900003252.
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 3.59369999205e-05.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 1.18592095896e-05.
Epoch 19/200
Epoch 00019: early stopping
*** MODEL: 14  ***
train_i[:5] [900, 2748, 3250, 2596, 2496]
val_i[:5] [438, 3702, 4788, 1622, 4813]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.424970732589
np.mean(y_val): 0.435256209886
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epo

Epoch 8/200

Epoch 00008: reducing learning rate to 0.000330000015674.
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 0.000108900003252.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 3.59369999205e-05.
Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 1.18592095896e-05.
Epoch 21/200
Epoch 22/200

Epoch 00022: reducing learning rate to 3.91353921259e-06.
Epoch 23/200
Epoch 00023: early stopping
*** MODEL: 15  ***
train_i[:5] [4006, 4206, 1093, 874, 323]
val_i[:5] [4223, 2845, 4523, 1862, 4487]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.427977871124
np.mean(y_val): 0.432247850426
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200


Epoch 12/200
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 0.000108900003252.
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 3.59369999205e-05.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 1.18592095896e-05.
Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 3.91353921259e-06.
Epoch 21/200
Epoch 00021: early stopping
*** MODEL: 16  ***
train_i[:5] [3450, 4147, 3201, 1158, 2941]
val_i[:5] [753, 917, 3903, 3228, 31]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.437327939367
np.mean(y_val): 0.422893985972
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200

Epoch 00009: reducing learning rate to 0.000330000015674.
Epoch 


Epoch 00018: reducing learning rate to 1.18592095896e-05.
Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 3.91353921259e-06.
Epoch 21/200
Epoch 00021: early stopping
*** MODEL: 17  ***
train_i[:5] [1146, 4321, 2780, 1084, 3364]
val_i[:5] [3295, 1447, 2251, 2442, 756]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.430913757163
np.mean(y_val): 0.429310772391
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 0.000330000015674.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 0.000108900003252.
Epoch 19/200
Epoch 20/200
Epoch 21/2

Epoch 24/200

Epoch 00024: reducing learning rate to 3.59369999205e-05.
Epoch 25/200
Epoch 26/200

Epoch 00026: reducing learning rate to 1.18592095896e-05.
Epoch 27/200
Epoch 28/200

Epoch 00028: reducing learning rate to 3.91353921259e-06.
Epoch 29/200
Epoch 30/200

Epoch 00030: reducing learning rate to 1.29146797917e-06.
Epoch 31/200
Epoch 00031: early stopping
*** MODEL: 18  ***
train_i[:5] [4198, 270, 65, 348, 2385]
val_i[:5] [2459, 4081, 3565, 3411, 2488]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.421280392512
np.mean(y_val): 0.438948048274
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00010: reducing learning rate to 0.000330000015674.
Epoch 11/200
Epoch 

Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 1.18592095896e-05.
Epoch 21/200
Epoch 22/200

Epoch 00022: reducing learning rate to 3.91353921259e-06.
Epoch 23/200
Epoch 00023: early stopping
*** MODEL: 19  ***
train_i[:5] [1258, 3838, 4789, 4264, 256]
val_i[:5] [3297, 4350, 2422, 1407, 3151]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.439810358949
np.mean(y_val): 0.420410558506
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200

Epoch 00005: reducing learning rate to 0.000330000015674.
Epoch 6/200
Epoch 7/200

Epoch 00007: reducing learning rate to 0.000108900003252.
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00011: reducing learning rate to 3.59369999205e-05.
Epoch 12/200
Epoch 13/200

Epoch 00013: re

Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200

Epoch 00008: reducing learning rate to 0.000330000015674.
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200

Epoch 00013: reducing learning rate to 0.000108900003252.
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 3.59369999205e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 1.18592095896e-05.
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 3.91353921259e-06.
Epoch 20/200
Epoch 00020: early stopping
*** MODEL: 21  ***
train_i[:5] [3993, 1172, 2538, 4410, 2529]
val_i[:5] [650, 1110, 710, 4058, 3372]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.42336208196
np.mean(y_val): 0.436865513642
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoc

Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 0.000108900003252.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 3.59369999205e-05.
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 1.18592095896e-05.
Epoch 20/200
Epoch 21/200

Epoch 00021: reducing learning rate to 3.91353921259e-06.
Epoch 22/200
Epoch 00022: early stopping
*** MODEL: 22  ***
train_i[:5] [1102, 2041, 3939, 265, 4222]
val_i[:5] [4675, 901, 1164, 2808, 1511]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.424308717472
np.mean(y_val): 0.435918493788
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200

Epoch 00008: reducing learning rate

Epoch 15/200

Epoch 00015: reducing learning rate to 3.59369999205e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 1.18592095896e-05.
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 3.91353921259e-06.
Epoch 20/200
Epoch 00020: early stopping
*** MODEL: 23  ***
train_i[:5] [2594, 2140, 4103, 793, 4844]
val_i[:5] [3393, 1262, 694, 1143, 4339]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.427405752963
np.mean(y_val): 0.432820200873
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00010: reducing learning rate to 0.000330000015674.
Epoch 11/200
Epoch 12/200

Epoch 00012: reducing learning rate to 0.000108900003252.
Epoch 13/200
Epoc

Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00011: reducing learning rate to 0.000330000015674.
Epoch 12/200
Epoch 13/200

Epoch 00013: reducing learning rate to 0.000108900003252.
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 3.59369999205e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 1.18592095896e-05.
Epoch 18/200
Epoch 00018: early stopping
*** MODEL: 25  ***
train_i[:5] [102, 2788, 2760, 1180, 3433]
val_i[:5] [1275, 638, 513, 2240, 3986]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.424195271449
np.mean(y_val): 0.436031985871
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200

Epoch 00005: reducing learni

Epoch 11/200

Epoch 00011: reducing learning rate to 0.000108900003252.
Epoch 12/200
Epoch 13/200

Epoch 00013: reducing learning rate to 3.59369999205e-05.
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 1.18592095896e-05.
Epoch 16/200
Epoch 17/200

Epoch 00017: reducing learning rate to 3.91353921259e-06.
Epoch 18/200
Epoch 00018: early stopping
*** MODEL: 26  ***
train_i[:5] [3890, 4650, 3792, 894, 271]
val_i[:5] [1845, 1541, 1942, 2225, 4917]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.448344310045
np.mean(y_val): 0.41187314255
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200

Epoch 00005: reducing learning rate to 0.000330000015674.
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200

Epoch 00009: reducing learning

Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 1.29146797917e-06.
Epoch 21/200
Epoch 00021: early stopping
*** MODEL: 27  ***
train_i[:5] [408, 1649, 1811, 2669, 4904]
val_i[:5] [4053, 4343, 3463, 3264, 3632]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.435267229383
np.mean(y_val): 0.424955532623
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00011: reducing learning rate to 0.000330000015674.
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 0.000108900003252.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 3.59369999205e-05.
Epoch 19/200
Epoch 20/200

Epoch 000

X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.shape: (17241, 75, 75, 2)
y_val.shape: (17241,)
Train on 17248 samples, validate on 17241 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00010: reducing learning rate to 0.000330000015674.
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 0.000108900003252.
Epoch 15/200
Epoch 16/200

Epoch 00016: reducing learning rate to 3.59369999205e-05.
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 1.18592095896e-05.
Epoch 19/200
Epoch 20/200

Epoch 00020: reducing learning rate to 3.91353921259e-06.
Epoch 21/200
Epoch 22/200

Epoch 00022: reducing learning rate to 1.29146797917e-06.
Epoch 00022: early stopping
*** MODEL: 29  ***
train_i[:5] [813, 4343, 4107, 2903, 1352]
val_i[:5] [2343, 3806, 3206, 3794, 157]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75

Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200

Epoch 00008: reducing learning rate to 0.000330000015674.
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200

Epoch 00014: reducing learning rate to 0.000108900003252.
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200

Epoch 00019: reducing learning rate to 3.59369999205e-05.
Epoch 20/200
Epoch 21/200

Epoch 00021: reducing learning rate to 1.18592095896e-05.
Epoch 22/200
Epoch 23/200

Epoch 00023: reducing learning rate to 3.91353921259e-06.
Epoch 24/200
Epoch 25/200

Epoch 00025: reducing learning rate to 1.29146797917e-06.
Epoch 26/200
Epoch 00026: early stopping
*** MODEL: 30  ***
train_i[:5] [737, 2287, 3751, 1251, 2026]
val_i[:5] [3837, 3707, 3241, 4441, 2970]
X_train.shape: (2464, 75, 75, 2)
y_train.shape: (2464,)
X_val.shape: (2463, 75, 75, 2)
y_val.shape: (2463,)
np.mean(y_train): 0.435572854789
np.mean(y_val): 0.42464978313
X_train.shape: (17248, 75, 75, 2)
y_train.shape: (17248,)
X_val.

Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00011: reducing learning rate to 0.000330000015674.
Epoch 12/200
Epoch 13/200

Epoch 00013: reducing learning rate to 0.000108900003252.
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 3.59369999205e-05.
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200

Epoch 00025: reducing learning rate to 1.18592095896e-05.
Epoch 26/200
Epoch 27/200

Epoch 00027: reducing learning rate to 3.91353921259e-06.
Epoch 28/200
Epoch 29/200

Epoch 00029: reducing learning rate to 1.29146797917e-06.
Epoch 30/200
Epoch 31/200

Epoch 00031: reducing learning rate to 4.26184415119e-07.
Epoch 32/200
Epoch 00032: early stopping


# Predict Test

In [9]:
# Load test data
df = pd.read_json('./data/test.json')
X_test, y_test = [], []
for im_band1, im_band2 in zip(df['band_1'], df['band_2']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess - zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # Preprocess - normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
    X_test.append(im)    
X_test = np.array(X_test)
print 'X_test.shape:', X_test.shape

X_test.shape: (8424, 75, 75, 2)


In [10]:
y_test_p = 0
# weights = [0.25, 0.4 / 3, 0.35, 0.4 / 3, 0.4 / 3]
# weights = [0.2, 0.18, 0.2, 0.2, 0.22]
weights = [0.1] * N_MODELS
for i, w in zip(range(N_MODELS), weights):
    print i
    # Load the model
    MODEL_PATH = './models/model16/model' + str(i + 1) + '.h5'
    model = load_model(MODEL_PATH)
    # predict - tta    
    for func in aug_funcs:
        y_test_p += model.predict(func(X_test), verbose=1).flatten() * w
# y_test_p = y_test_p / (len(aug_funcs) * 5.0)
y_test_p = y_test_p / (len(aug_funcs) * sum(weights))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


20
21
22
23
24
25
26
27
28
29


In [11]:
df_sub = pd.DataFrame()
df_sub['id'] = df['id']
df_sub['is_iceberg'] = y_test_p.flatten()
df_sub.to_csv('./submissions/sub33.csv', index=False)