Ideas:

* Add FFT channels to CNN (Tried, does not make much difference)
* Finetune CNN (with SGD slow learning rate)
* 5-fold CNN
* Extract Features from CNN (before FC) and do XGB
* TTA (tried, made it better)
* More augmenting, additional 45, 135, 315 degrees
* More augmenting, random rotations and flips
* Predict test data and train with test
* Train on all of the training data (no train-val split)
* Try a different combination of combine predictions
* Fine-tune on pre-trained models (Get rid of some top layers because input size is small)

In this notebook, no augmentation, no add test data margin

* [Create Dataset](#Create-Dataset)
* [Train - Val Split](#Train---Val-Split)
* [Data Augmentation](#Data-Augmentation)
* [Training](#Training)
* [Predict Test](#Predict-Test)

In [1]:
import numpy as np
import pandas as pd
from skimage import transform
from keras import layers
from keras.models import Sequential, load_model, Model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, LeakyReLU, concatenate
from keras import losses, optimizers, callbacks
from keras import regularizers
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from scipy.signal import fftconvolve
from sklearn.model_selection import cross_validate, train_test_split, StratifiedKFold, KFold
from scipy import fftpack
import matplotlib.pyplot as plt
%matplotlib inline
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED)

Using TensorFlow backend.


In [2]:
def bypass(x):
    return x

def h_flip(x):
    return x[:, :, ::-1, :]

def v_flip(x):
    return x[:, ::-1, :, :]

def hv_flip(x):
    return h_flip(v_flip(x))

def rot90(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 90), axis=0) for im in x], axis=0)

def rot180(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 180), axis=0) for im in x], axis=0)

def rot270(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 270), axis=0) for im in x], axis=0)

def rot45(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 45, mode='reflect'), axis=0) for im in x], axis=0)

def rot135(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 135, mode='reflect'), axis=0) for im in x], axis=0)

def rot315(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 315, mode='reflect'), axis=0) for im in x], axis=0)

aug_funcs = [bypass, 
             h_flip, v_flip, hv_flip,
             rot90, rot180, rot270]

aug_funcs = [bypass]

# Create Dataset 

In [3]:
# Train
df_train = pd.read_json('./data/train-angle-filled.json')
df = df_train
print len(df)

1604


In [4]:
df

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878361, -27.15416, -28.668615, -29.537971...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.923900,0
1,"[-12.242375, -14.920305, -14.920363, -12.66633...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.156200,0
10,"[-21.397552, -19.753859, -23.426783, -24.65221...","[-26.72291, -27.418192, -27.787899, -25.774536...",3aac67cd,44.624000,1
100,"[-20.04884, -19.469616, -20.510244, -19.61095,...","[-29.742329, -26.374287, -25.490265, -25.49031...",66348d03,41.134200,0
1000,"[-23.199345, -23.603487, -25.965549, -27.12546...","[-23.004148, -24.942425, -24.472878, -23.00437...",7052a617,33.897500,0
1001,"[-22.34741, -22.156555, -25.308764, -24.530453...","[-24.782082, -24.047678, -24.782185, -27.45301...",3062fca8,39.962700,1
1002,"[-20.845585, -17.811007, -20.689199, -21.84909...","[-26.110413, -25.549898, -25.549961, -26.70986...",4ea48c18,37.326000,0
1003,"[-25.098461, -25.098461, -24.320147, -21.05014...","[-29.62639, -29.62639, -28.757122, -29.180954,...",b7519a52,42.559000,1
1004,"[-25.847187, -20.741787, -19.826689, -18.99888...","[-25.562378, -23.348463, -26.76244, -30.780788...",ed4a2968,40.395800,1
1005,"[-19.860071, -19.443127, -18.789801, -19.44324...","[-29.12228, -26.939449, -27.267315, -29.54624,...",5d58d936,38.853700,1


In [5]:
def normalize(im):
    im = im - np.mean(im)
    im = im / np.std(im)
    return im

def get_convolve(im1, im2):
    im1 = im1 - np.mean(im1)
    im2 = im2 - np.mean(im2)
    im_conv = fftconvolve(im1, im2[::-1, ::-1], mode='same')
    return normalize(im_conv)

In [6]:
X, y = [], []
for im_band1, im_band2, label in zip(df['band_1'], df['band_2'], df['is_iceberg']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess
    # - Zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # - Normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
#     im = np.concatenate([normalize(im_band1), normalize(im_band2), get_convolve(im_band1, im_band2)], axis=2)
    X.append(im)
    y.append(label)    
X = np.array(X)
X_inc_angle = np.array(df['inc_angle'])
y = np.array(y)
print 'X.shape:', X.shape
print 'X_inc_angle.shape', X_inc_angle.shape
print 'y.shape:', y.shape

X.shape: (1604, 75, 75, 2)
X_inc_angle.shape (1604,)
y.shape: (1604,)


# Train - Val Split

In [7]:
N_SPLITS = 5
MODEL_NUMBER = 1
skf = StratifiedKFold(n_splits=N_SPLITS, random_state=RANDOM_SEED, shuffle=True)
cv = list(skf.split(X, y > 0.5))

In [8]:
train_i, val_i = cv[MODEL_NUMBER - 1]
X_train, X_train_inc_angle, y_train = X[train_i], X_inc_angle[train_i], y[train_i]
X_val, X_val_inc_angle, y_val = X[val_i], X_inc_angle[val_i], y[val_i]
print 'X_train.shape:', X_train.shape
print 'X_train_inc_angle.shape:', X_train_inc_angle.shape
print 'y_train.shape:', y_train.shape
print 'X_val.shape:', X_val.shape
print 'X_val_inc_angle.shape:', X_val_inc_angle.shape
print 'y_val.shape:', y_val.shape
print 'np.mean(y_train):', np.mean(y_train)
print 'np.mean(y_val):', np.mean(y_val)

X_train.shape: (1282, 75, 75, 2)
X_train_inc_angle.shape: (1282,)
y_train.shape: (1282,)
X_val.shape: (322, 75, 75, 2)
X_val_inc_angle.shape: (322,)
y_val.shape: (322,)
np.mean(y_train): 0.469578783151
np.mean(y_val): 0.468944099379


# Data Augmentation

In [9]:
# Train
X_train = np.concatenate([func(X_train) for func in aug_funcs], axis=0)
y_train = np.concatenate([y_train] * len(aug_funcs))
X_train_inc_angle = np.array(list(X_train_inc_angle) * len(aug_funcs))

# Validation
X_val = np.concatenate([func(X_val) for func in aug_funcs], axis=0)
y_val = np.concatenate([y_val] * len(aug_funcs))
X_val_inc_angle = np.array(list(X_val_inc_angle) * len(aug_funcs))

# 
print 'X_train.shape:', X_train.shape
print 'X_train_inc_angle.shape:', X_train_inc_angle.shape
print 'y_train.shape:', y_train.shape
print 'X_val.shape:', X_val.shape
print 'X_val_inc_angle.shape', X_val_inc_angle.shape
print 'y_val.shape:', y_val.shape

X_train.shape: (1282, 75, 75, 2)
X_train_inc_angle.shape: (1282,)
y_train.shape: (1282,)
X_val.shape: (322, 75, 75, 2)
X_val_inc_angle.shape (322,)
y_val.shape: (322,)


# Training

In [10]:
def get_model(input_shape):
    # Input
    input_tensor = layers.Input(shape=input_shape)
    
    # Block 1
    x = layers.Conv2D(32, kernel_size=(3, 3), activation='relu')(input_tensor)
    x = layers.Dropout(0.25)(x)
    
    # Block 2
    x = layers.Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Dropout(0.25)(x)    
    
    # Block 3
    x = layers.Conv2D(128, kernel_size=(3, 3), activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Dropout(0.25)(x)
    
    # Block 4
    x = layers.Conv2D(256, kernel_size=(3, 3))(x)
    x = layers.LeakyReLU()(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Dropout(0.5)(x)
    
    # FC
    x = layers.Flatten()(x)
    # merge inc_angle
    inc_angle = layers.Input(shape=(1,))
    x = layers.concatenate([x, inc_angle])    
    x = layers.Dense(32)(x)
    x = layers.LeakyReLU()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(1, activation='sigmoid')(x)
    
    # Create the model
    model = Model(inputs=[input_tensor, inc_angle], outputs=x)
    
    # Compile the model
    loss = losses.binary_crossentropy
    optimizer = optimizers.Adam()
    metrics = ['accuracy']
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
        
    return model

In [11]:
model = get_model(input_shape=(75, 75, 2))

In [12]:
# Callbacks
def get_lr(epoch):
    lr = (np.random.rand() * 4e-2 + 1e-7)
    lr = np.clip(lr, a_min=None, a_max=0.025)
    print 'lr:', lr
    return lr
MODEL_PATH = './models/model20/model' + str(MODEL_NUMBER) + '.h5'
m_q = 'val_loss'
model_path = MODEL_PATH
check_pt = callbacks.ModelCheckpoint(filepath=model_path, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=10, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=3, factor=0.33, monitor=m_q, verbose=1)
schedule_lr = callbacks.LearningRateScheduler(get_lr)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit([X_train, X_train_inc_angle], y_train, validation_data=([X_val, X_val_inc_angle], y_val), callbacks=callback_list, epochs=200, batch_size=32)

Train on 1282 samples, validate on 322 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200

Epoch 00009: reducing learning rate to 0.000330000015674.
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200

Epoch 00015: reducing learning rate to 0.000108900003252.
Epoch 16/200
Epoch 17/200
Epoch 18/200

Epoch 00018: reducing learning rate to 3.59369999205e-05.
Epoch 19/200
Epoch 20/200
Epoch 21/200

Epoch 00021: reducing learning rate to 1.18592095896e-05.
Epoch 00021: early stopping


<keras.callbacks.History at 0x7fbde5065650>

# Predict Test

In [13]:
# Load test data
df = pd.read_json('./data/train.json')
X_test, y_test = [], []
for im_band1, im_band2 in zip(df['band_1'], df['band_2']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)    
    # Preprocess - zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    # Preprocess - normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)    
    im = np.concatenate([im_band1, im_band2], axis=2)
    X_test.append(im)    
X_test = np.array(X_test)
X_test_inc_angle = np.array(df['inc_angle'])
print 'X_test.shape:', X_test.shape
print 'X_test_inc_angle.shape:', X_test_inc_angle.shape

X_test.shape: (1604, 75, 75, 2)
X_test_inc_angle.shape: (1604,)


In [14]:
y_test_p = 0
# weights = [0.25, 0.4 / 3, 0.35, 0.4 / 3, 0.4 / 3]
# weights = [0.2, 0.18, 0.2, 0.2, 0.22]
weights = [0.2] * 5
for i, w in zip(range(5), weights):
    print i
    # Load the model
    MODEL_PATH = './models/model20/model' + str(i + 1) + '.h5'
    model = load_model(MODEL_PATH)
    # predict - tta    
    for func in aug_funcs:
        y_test_p += model.predict([func(X_test), X_test_inc_angle], verbose=1).flatten() * w
# y_test_p = y_test_p / (len(aug_funcs) * 5.0)
y_test_p = y_test_p / (len(aug_funcs) * sum(weights))

0
  32/1604 [..............................] - ETA: 2s

ValueError: could not convert string to float: na

In [None]:
df_sub = pd.DataFrame()
df_sub['id'] = df['id']
df_sub['is_iceberg'] = y_test_p.flatten()
df_sub.to_csv('./submissions/model20-train-predictions.csv', index=False)