In [1]:
import numpy as np
import pandas as pd
from skimage import transform
from keras.models import Model, Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras import losses, optimizers, callbacks
from keras.applications import vgg16, vgg19, inception_v3
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate, train_test_split
from scipy import fftpack
import matplotlib.pyplot as plt
%matplotlib inline
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED)

Using TensorFlow backend.


# Create Dataset

In [2]:
# Train
output_shape = (139, 139)
df = pd.read_json('./data/train.json')
X_train, y_train = [], []
for im_band1, im_band2, label in zip(df['band_1'], df['band_2'], df['is_iceberg']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)  
    im_band1_fft = fftpack.fftshift(np.log(np.abs(fftpack.fft2(im_band1))))
    im_bands_avg = (im_band1 + im_band2) / 2.0
#     # Preprocess - resize
#     im_band1 = transform.resize(im_band1, output_shape=output_shape)
#     im_band2 = transform.resize(im_band2, output_shape=output_shape)
#     im_bands_avg = transform.resize(im_bands_avg, output_shape=output_shape)
    # Preprocess - zero mean
    im_band1 -= np.mean(im_band1)
    im_band2 -= np.mean(im_band2)
    im_bands_avg -= np.mean(im_bands_avg)
    # Preprocess - normalize
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)
    im_bands_avg /= np.std(im_bands_avg)
    # Concatenate
    im = np.concatenate([im_band1, im_band2, im_bands_avg], axis=2)
    X_train.append(im)
    y_train.append(label)    
X_train = np.array(X_train)
y_train = np.array(y_train)
print 'X_train.shape:', X_train.shape
print 'y_train.shape:', y_train.shape

X_train.shape: (1604, 75, 75, 3)
y_train.shape: (1604,)


# Train - Val Split

In [3]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=RANDOM_SEED)
print 'X_train.shape:', X_train.shape
print 'X_val.shape:', X_val.shape
print 'y_train.shape:', y_train.shape
print 'y_val.shape:', y_val.shape

X_train.shape: (1283, 75, 75, 3)
X_val.shape: (321, 75, 75, 3)
y_train.shape: (1283,)
y_val.shape: (321,)


# Data Augmentation

In [4]:
def bypass(x):
    return x

def h_flip(x):
    return x[:, :, ::-1, :]

def v_flip(x):
    return x[:, ::-1, :, :]

def hv_flip(x):
    return h_flip(v_flip(x))

def rot90(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 90), axis=0) for im in x], axis=0)

def rot180(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 180), axis=0) for im in x], axis=0)

def rot270(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 270), axis=0) for im in x], axis=0)

def rot45(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 45, mode='reflect'), axis=0) for im in x], axis=0)

def rot135(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 135, mode='reflect'), axis=0) for im in x], axis=0)

def rot315(x):
    return np.concatenate([np.expand_dims(transform.rotate(im, 315, mode='reflect'), axis=0) for im in x], axis=0)

aug_funcs = [bypass, 
             h_flip, v_flip, hv_flip,
             rot90, rot180, rot270]

In [5]:
# Train
X_train = np.concatenate([func(X_train) for func in aug_funcs], axis=0)
y_train = np.concatenate([y_train] * len(aug_funcs))

# Validation
X_val = np.concatenate([func(X_val) for func in aug_funcs], axis=0)
y_val = np.concatenate([y_val] * len(aug_funcs))

# 
print 'X_train.shape:', X_train.shape
print 'y_train.shape:', y_train.shape
print 'X_val.shape:', X_val.shape
print 'y_val.shape:', y_val.shape

X_train.shape: (8981, 75, 75, 3)
y_train.shape: (8981,)
X_val.shape: (2247, 75, 75, 3)
y_val.shape: (2247,)


# Training

In [6]:
# Make model
INPUT_SHAPE = (75, 75, 3)
base_model = vgg16.VGG16(include_top=False, input_shape=INPUT_SHAPE)
x = base_model.layers[-5].output
x = GlobalAveragePooling2D()(x)
# x = GlobalMaxPooling2D()(x)
# x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=x)

In [7]:
# -- Top Model Train --
for layer in  base_model.layers:
    layer.trainable = False

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.Adam()
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
MODEL_PATH = './models/model3.h5'
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.33, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200

Epoch 00007: reducing learning rate to 0.000330000015674.
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00010: reducing learning rate to 0.000108900003252.
Epoch 12/200
Epoch 13/200
Epoch 14/200

Epoch 00013: reducing learning rate to 3.59369999205e-05.
Epoch 15/200

Epoch 00014: reducing learning rate to 1.18592095896e-05.
Epoch 16/200

Epoch 00015: reducing learning rate to 3.91353921259e-06.
Epoch 00015: early stopping


<keras.callbacks.History at 0x7f9cf692d6d0>

In [8]:
# Load the model
model = load_model(MODEL_PATH)

# -- Entire Model Train --
for layer in model.layers:
    layer.trainable = True

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=6, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200

Epoch 00007: reducing learning rate to 4.99999987369e-05.
Epoch 9/200

Epoch 00008: reducing learning rate to 2.49999993684e-05.
Epoch 10/200

Epoch 00009: reducing learning rate to 1.24999996842e-05.
Epoch 11/200

Epoch 00010: reducing learning rate to 6.24999984211e-06.
Epoch 12/200

Epoch 00011: reducing learning rate to 3.12499992106e-06.
Epoch 13/200

Epoch 00012: reducing learning rate to 1.56249996053e-06.
Epoch 00012: early stopping


<keras.callbacks.History at 0x7f9c3f745290>

In [9]:
# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-5)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=6, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200

Epoch 00004: reducing learning rate to 4.99999987369e-06.
Epoch 6/200

Epoch 00005: reducing learning rate to 2.49999993684e-06.
Epoch 7/200

Epoch 00006: reducing learning rate to 1.24999996842e-06.
Epoch 8/200

Epoch 00007: reducing learning rate to 6.24999984211e-07.
Epoch 9/200

Epoch 00008: reducing learning rate to 3.12499992106e-07.
Epoch 10/200

Epoch 00009: reducing learning rate to 1.56249996053e-07.
Epoch 11/200

Epoch 00010: reducing learning rate to 7.81249980264e-08.
Epoch 12/200

Epoch 00011: reducing learning rate to 3.90624990132e-08.
Epoch 13/200

Epoch 00012: reducing learning rate to 1.95312495066e-08.
Epoch 14/200

Epoch 00013: reducing learning rate to 9.7656247533e-09.
Epoch 00013: early stopping


<keras.callbacks.History at 0x7ff344f6c190>

In [10]:
# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-5)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=5, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200

Epoch 00002: reducing learning rate to 4.99999987369e-06.
Epoch 4/200

Epoch 00003: reducing learning rate to 2.49999993684e-06.
Epoch 5/200

Epoch 00004: reducing learning rate to 1.24999996842e-06.
Epoch 6/200

Epoch 00005: reducing learning rate to 6.24999984211e-07.
Epoch 7/200

Epoch 00006: reducing learning rate to 3.12499992106e-07.
Epoch 00006: early stopping


<keras.callbacks.History at 0x7ff34ed32210>

In [11]:
# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=5e-5)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=6, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200

Epoch 00003: reducing learning rate to 2.49999993684e-05.
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00006: reducing learning rate to 1.24999996842e-05.
Epoch 8/200

Epoch 00007: reducing learning rate to 6.24999984211e-06.
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00010: reducing learning rate to 3.12499992106e-06.
Epoch 12/200

Epoch 00011: reducing learning rate to 1.56249996053e-06.
Epoch 13/200

Epoch 00012: reducing learning rate to 7.81249980264e-07.
Epoch 14/200

Epoch 00013: reducing learning rate to 3.90624990132e-07.
Epoch 15/200

Epoch 00014: reducing learning rate to 1.95312495066e-07.
Epoch 16/200

Epoch 00015: reducing learning rate to 9.7656247533e-08.
Epoch 00015: early stopping


<keras.callbacks.History at 0x7ff315e05d90>

In [12]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=5e-5)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=4, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200

Epoch 00002: reducing learning rate to 2.49999993684e-05.
Epoch 4/200

Epoch 00003: reducing learning rate to 1.24999996842e-05.
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00006: reducing learning rate to 6.24999984211e-06.
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00009: reducing learning rate to 3.12499992106e-06.
Epoch 11/200

Epoch 00010: reducing learning rate to 1.56249996053e-06.
Epoch 12/200

Epoch 00011: reducing learning rate to 7.81249980264e-07.
Epoch 13/200

Epoch 00012: reducing learning rate to 3.90624990132e-07.
Epoch 00012: early stopping


<keras.callbacks.History at 0x7ff315c6e290>

In [13]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=9e-5)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200

Epoch 00002: reducing learning rate to 4.50000006822e-05.
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200

Epoch 00008: reducing learning rate to 2.25000003411e-05.
Epoch 10/200
Epoch 11/200
Epoch 12/200

Epoch 00011: reducing learning rate to 1.12500001705e-05.
Epoch 13/200

Epoch 00012: reducing learning rate to 5.62500008527e-06.
Epoch 14/200

Epoch 00013: reducing learning rate to 2.81250004264e-06.
Epoch 00013: early stopping


<keras.callbacks.History at 0x7ff314d2ef10>

In [14]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00006: reducing learning rate to 4.99999987369e-05.
Epoch 8/200

Epoch 00007: reducing learning rate to 2.49999993684e-05.
Epoch 9/200

Epoch 00008: reducing learning rate to 1.24999996842e-05.
Epoch 00008: early stopping


<keras.callbacks.History at 0x7ff2cbd75450>

In [15]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1.1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00006: reducing learning rate to 5.50000004296e-05.
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

Epoch 00010: reducing learning rate to 2.75000002148e-05.
Epoch 12/200

Epoch 00011: reducing learning rate to 1.37500001074e-05.
Epoch 13/200

Epoch 00012: reducing learning rate to 6.8750000537e-06.
Epoch 14/200

Epoch 00013: reducing learning rate to 3.43750002685e-06.
Epoch 00013: early stopping


<keras.callbacks.History at 0x7ff2cadd7f10>

In [16]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200

Epoch 00002: reducing learning rate to 4.99999987369e-05.
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200

Epoch 00008: reducing learning rate to 2.49999993684e-05.
Epoch 10/200

Epoch 00009: reducing learning rate to 1.24999996842e-05.
Epoch 11/200

Epoch 00010: reducing learning rate to 6.24999984211e-06.
Epoch 00010: early stopping


<keras.callbacks.History at 0x7ff346fb07d0>

In [17]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=2e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200

Epoch 00004: reducing learning rate to 9.99999974738e-05.
Epoch 6/200
Epoch 7/200
Epoch 8/200

Epoch 00007: reducing learning rate to 4.99999987369e-05.
Epoch 9/200

Epoch 00008: reducing learning rate to 2.49999993684e-05.
Epoch 10/200

Epoch 00009: reducing learning rate to 1.24999996842e-05.
Epoch 00009: early stopping


<keras.callbacks.History at 0x7ff2c9d85090>

In [18]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200

Epoch 00003: reducing learning rate to 4.99999987369e-05.
Epoch 5/200
Epoch 6/200
Epoch 7/200

Epoch 00006: reducing learning rate to 2.49999993684e-05.
Epoch 8/200

Epoch 00007: reducing learning rate to 1.24999996842e-05.
Epoch 9/200

Epoch 00008: reducing learning rate to 6.24999984211e-06.
Epoch 00008: early stopping


<keras.callbacks.History at 0x7ff2c91dc910>

In [19]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

Epoch 00009: reducing learning rate to 4.99999987369e-05.
Epoch 11/200
Epoch 12/200
Epoch 13/200

Epoch 00012: reducing learning rate to 2.49999993684e-05.
Epoch 14/200

Epoch 00013: reducing learning rate to 1.24999996842e-05.
Epoch 15/200
Epoch 16/200
Epoch 17/200

Epoch 00016: reducing learning rate to 6.24999984211e-06.
Epoch 18/200

Epoch 00017: reducing learning rate to 3.12499992106e-06.
Epoch 19/200

Epoch 00018: reducing learning rate to 1.56249996053e-06.
Epoch 00018: early stopping


<keras.callbacks.History at 0x7ff2c8751f90>

In [20]:
# Load the model
model = load_model(MODEL_PATH)

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200

Epoch 00004: reducing learning rate to 4.99999987369e-05.
Epoch 6/200

Epoch 00005: reducing learning rate to 2.49999993684e-05.
Epoch 7/200

Epoch 00006: reducing learning rate to 1.24999996842e-05.
Epoch 00006: early stopping


<keras.callbacks.History at 0x7ff2ade0e910>

In [21]:
# Load the model
model = load_model('./models/model3 (copy).h5')

# Compile
loss = losses.binary_crossentropy
optimizer = optimizers.SGD(lr=1e-4)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

# Fit
m_q = 'val_loss'
check_pt = callbacks.ModelCheckpoint(filepath=MODEL_PATH, monitor=m_q, save_best_only=True, verbose=1)
early_stop = callbacks.EarlyStopping(patience=3, monitor=m_q, verbose=1)
reduce_lr = callbacks.ReduceLROnPlateau(patience=1, factor=0.5, monitor=m_q, verbose=1)
callback_list = [check_pt, early_stop, reduce_lr]

model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callback_list, epochs=200)

Train on 8981 samples, validate on 2247 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200

Epoch 00005: reducing learning rate to 4.99999987369e-05.
Epoch 7/200

Epoch 00006: reducing learning rate to 2.49999993684e-05.
Epoch 8/200

Epoch 00007: reducing learning rate to 1.24999996842e-05.
Epoch 00007: early stopping


<keras.callbacks.History at 0x7ff2ad199d10>

# Predict Test

In [26]:
# Load test data
df = pd.read_json('./data/test.json')
X_test, y_test = [], []
for im_band1, im_band2 in zip(df['band_1'], df['band_2']):
    im_band1 = np.array(im_band1).reshape(75, 75, 1)
    im_band2 = np.array(im_band2).reshape(75, 75, 1)  
    im_bands_avg = (im_band1 + im_band2) / 2.0
    # Preprocess
    im_band1 /= np.std(im_band1)
    im_band2 /= np.std(im_band2)  
    im_bands_avg /= np.std(im_bands_avg)
    im = np.concatenate([im_band1, im_band2, im_bands_avg], axis=2)
    X_test.append(im)    
X_test = np.array(X_test)
print 'X_test.shape:', X_test.shape

X_test.shape: (8424, 75, 75, 3)


In [27]:
# Load the model
MODEL_PATH = './models/model3 (copy).h5'
model = load_model(MODEL_PATH)

In [28]:
# predict - tta
y_test_p = 0
for func in aug_funcs:
    y_test_p += model.predict(func(X_test), verbose=1).flatten()
y_test_p = y_test_p / len(aug_funcs)



In [29]:
df_sub = pd.DataFrame()
df_sub['id'] = df['id']
df_sub['is_iceberg'] = y_test_p.flatten()
df_sub.to_csv('./submissions/sub9.csv', index=False)