# Sequential model using Keras library

Following instructions from here: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

And using code from here: https://gist.github.com/fchollet/f35fbc80e066a49d65f1688a7e99f069

The eventual fully connected layers will be based on this: https://www.kaggle.com/devm2024/transfer-learning-with-vgg-16-convnet-lb-0-1850/notebook

# Create a new model

create a new model that does not rely on Imagenet database (Since data is not similar to imagenet)

#### Steps:
1. import data
2. separate training and validation sets
3. get data and labels for both sets
4. create simple model to start understanding data


#### December 13 Steps:
1. test linear images with CNN model
2. test data augmentation at 5 different levels (Rotation, zoom, horizontal_flip, vertical_flip)
3. create larger dataset by running data augmentation multiple times

In [15]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, Lambda, Conv2D, MaxPooling2D, BatchNormalization
from keras import applications
import os.path
import pandas as pd
import random
from keras.utils import to_categorical
%matplotlib inline
import matplotlib.pyplot as plt

In [3]:
#path = '/home/ubuntu/courses/deeplearning1/nbs/data/statoil/'
path = '/Users/ilanrotenberg/projects/courses/deeplearning1/nbs/data/statoil/'

In [4]:
# define variables

# dimensions of our images.
img_width, img_height = 75, 75

top_model_weights_path = path + 'results/bottleneck_fc_model.h5'
train_data_dir = path+ 'train/'
validate_data_dir = path+ 'validate/'
nb_train_samples = sum(os.path.isfile(os.path.join(train_data_dir + 'iceberg/', f)) for f in os.listdir(train_data_dir + 'iceberg/'))
nb_train_samples += sum(os.path.isfile(os.path.join(train_data_dir + 'ship/', f)) for f in os.listdir(train_data_dir + 'ship/'))
nb_validate_samples = sum(os.path.isfile(os.path.join(validate_data_dir + 'iceberg/', f)) for f in os.listdir(validate_data_dir + 'iceberg/'))
nb_validate_samples += sum(os.path.isfile(os.path.join(validate_data_dir + 'ship/', f)) for f in os.listdir(validate_data_dir + 'ship/'))
epochs = 50
batch_size = 16

## If data has already been split into train and validate sets - skip to the next markdown instruction

In [None]:
train_path = path + '/train.json'
test_path = path + '/test.json'
train_batch = pd.read_json(train_path)
test_batch = pd.read_json(test_path)

In [None]:
train_batch[:5]

In [None]:
validate_batch = train_batch.sample(frac=0.3)

In [None]:
validate_batch[:5]

In [None]:
new_train_batch = train_batch[~train_batch.index.isin(validate_batch.index)]
new_train_batch[:5]

In [None]:
validate_batch.to_json(path_or_buf = train_data_dir+'../validate.json')
new_train_batch.to_json(path_or_buf = train_data_dir+'../train_new.json')

## Skip here if data has already been split into training and validation sets

In [5]:
train_path = path + '/train_new.json'
validate_path = path + '/validate.json'
test_path = path + '/test.json'
img_width, img_height = 75, 75
train_batch = pd.read_json(train_path)
validate_batch = pd.read_json(validate_path)
test_batch = pd.read_json(test_path)

In [6]:
train_batch[:5]

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878361, -27.15416, -28.668615, -29.537971...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920305, -14.920363, -12.66633...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
10,"[-21.397552, -19.753859, -23.426783, -24.65221...","[-26.72291, -27.418192, -27.787899, -25.774536...",3aac67cd,44.624,1
100,"[-20.04884, -19.469616, -20.510244, -19.61095,...","[-29.742329, -26.374287, -25.490265, -25.49031...",66348d03,41.1342,0
1001,"[-22.34741, -22.156555, -25.308764, -24.530453...","[-24.782082, -24.047678, -24.782185, -27.45301...",3062fca8,39.9627,1


In [7]:
train_batch.inc_angle = train_batch.inc_angle.apply(lambda x:np.nan if x =='na' else x)

In [8]:
validate_batch.inc_angle = validate_batch.inc_angle.apply(lambda x:np.nan if x =='na' else x)

In [None]:
train_data = pd.read_json(train_path)

In [None]:
train_batch[:5]

In [None]:
validate_batch[:5]

In [1]:
def db_to_linear(band):
    return np.power(10,np.array(band)/10)

In [11]:
train_batch_band_1_lin = db_to_linear(train_band_1)

In [8]:
fig = plt.imshow(train_band_1_lin[3], cmap="jet")
fig

NameError: name 'plt' is not defined

In [10]:
train_band_1 = np.array([np.array(band).astype('float32').reshape(img_width,img_height) for band in train_batch.band_1])
train_band_2 = np.array([np.array(band).astype('float32').reshape(img_width,img_height) for band in train_batch.band_2])

validate_band_1 = np.array([np.array(band).astype('float32').reshape(img_width,img_height) for band in validate_batch.band_1])
validate_band_2 = np.array([np.array(band).astype('float32').reshape(img_width,img_height) for band in validate_batch.band_2])

test_band_1 = np.array([np.array(band).astype('float32').reshape(img_width,img_height) for band in test_batch.band_1])
test_band_2 = np.array([np.array(band).astype('float32').reshape(img_width,img_height) for band in test_batch.band_2])


In [None]:
train_band_1_lin = db_to_linear(train_band_1)
train_band_2_lin = db_to_linear(train_band_2)

validate_band_1_lin = db_to_linear(validate_band_1)
validate_band_2_lin = db_to_linear(validate_band_2)

test_band_1_lin = db_to_linear(test_band_1_lin)
test_band_2_lin = db_to_linear(test_band_2_lin)

In [None]:
train_band_3_lin = (train_band_1_lin +train_band_2_lin)/2
validate_band_3_lin = (validate_band_1_lin+validate_band_2_lin)/2
test_band_3_lin = (test_band_1_lin+test_band_2_lin)/2

In [7]:

train_band_3 = (train_band_1+train_band_2)/2
validate_band_3 = (validate_band_1+validate_band_2)/2
test_band_3 = (test_band_1+test_band_2)/2

In [8]:
X_train = np.concatenate([train_band_1[:,:,:,np.newaxis],
                         train_band_2[:,:,:,np.newaxis],
                         train_band_3[:,:,:,np.newaxis]],axis=-1)
X_validate = np.concatenate([validate_band_1[:,:,:,np.newaxis],
                            validate_band_2[:,:,:,np.newaxis],
                            validate_band_3[:,:,:,np.newaxis]], axis=-1)
X_test = np.concatenate([test_band_1[:,:,:,np.newaxis],
                        test_band_2[:,:,:,np.newaxis],
                        test_band_3[:,:,:,np.newaxis]], axis=-1)

In [None]:
X_train_lin = np.concatenate([train_band_1_lin[:,:,:,np.newaxis],
                         train_band_2_lin[:,:,:,np.newaxis],
                         train_band_3_lin[:,:,:,np.newaxis]],axis=-1)
X_validate_lin = np.concatenate([validate_band_1_lin[:,:,:,np.newaxis],
                            validate_band_2_lin[:,:,:,np.newaxis],
                            validate_band_3_lin[:,:,:,np.newaxis]], axis=-1)
X_test_lin = np.concatenate([test_band_1_lin[:,:,:,np.newaxis],
                        test_band_2_lin[:,:,:,np.newaxis],
                        test_band_3_lin[:,:,:,np.newaxis]], axis=-1)

In [9]:
y_train = np.array([np.array(iceberg).astype('uint8') for iceberg in train_batch.is_iceberg])

In [None]:
y_train.shape

In [10]:
y_validate = np.array([np.array(iceberg).astype('uint8') for iceberg in validate_batch.is_iceberg])

In [None]:
y_validate.shape

In [None]:
X_train.shape

In [None]:
X_validate.shape

In [11]:
def onehot(x):
    return to_categorical(x)


In [12]:
y_train = onehot(y_train)

In [None]:
y_train[:5]

In [13]:
y_validate = onehot(y_validate)
y_validate.shape

(481, 2)

In [None]:
y_validate[:5]

In [14]:
X_train = np.moveaxis(X_train,3,1)
X_validate = np.moveaxis(X_validate,3,1)
X_test = np.moveaxis(X_test,3,1)

In [15]:
X_validate.shape

(481, 3, 75, 75)

In [16]:
mean_trainx = X_train.mean().astype(np.float32)
std_trainx = X_train.std().astype(np.float32)

In [17]:
def norm_input(x): return ((x-mean_trainx)/std_trainx)

# Linear model

In [None]:
def get_linear_model():
    model = Sequential([
        Lambda(norm_input, input_shape = (3,img_height,img_width)),
        Flatten(),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer = 'Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
lm = get_linear_model()

In [None]:
gen_lin = ImageDataGenerator(data_format='channels_first')
batches_lin = gen_lin.flow(X_train_lin, y_train, batch_size=32, shuffle = True)
validation_batches_lin = gen_lin.flow(X_validate_lin, y_validate, batch_size=32, shuffle = True)

In [None]:
lm.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
lm.optimizer.lr=0.1

In [None]:
lm.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
lm.optimizer.lr=0.01

In [None]:
lm.fit_generator(batches, batches.n/64, epochs = 20, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

# Fully Connected model

In [None]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape = (3,img_height,img_width)),
        Flatten(),
        Dense(512, activation ='softmax'),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer = 'Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
fc = get_fc_model()

In [None]:
fc.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
fc.optimizer.lr = 0.1

In [None]:
fc.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
fc.optimizer.lr = 0.01

In [None]:
fc.fit_generator(batches, batches.n/64, epochs = 20, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

# Convolution model - same as below

In [None]:
def get_cnn_model():
    model = Sequential([
        Lambda(norm_input, input_shape = (3,75,75)),
        Conv2D(32, (3,3) ,activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32, (3,3),activation='relu'),
        MaxPooling2D(),
        Conv2D(64, (3,3),activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3),activation='relu'),
        MaxPooling2D(),
        Conv2D(128, (3,3),activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(128, (3,3),activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation ='relu'),
        BatchNormalization(),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer = 'Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
cm = get_cnn_model()

In [None]:
cm.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
cm.summary()

In [None]:
cm.optimizer.lr = 0.1

In [None]:
cm.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
cm.optimizer.lr = 0.01

In [None]:
cm.fit_generator(batches, batches.n/64, epochs = 10, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [18]:
gen = ImageDataGenerator(data_format='channels_first',
                        rotation_range = 90.,
                        horizontal_flip = True,
                        vertical_flip = True)
batches = gen.flow(X_train, y_train, batch_size=64, shuffle = True)
validation_batches = gen.flow(X_validate, y_validate, batch_size=64, shuffle = True)

In [None]:
cm.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
cm.optimizer.lr = 0.1

In [None]:
cm.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
cm.optimizer.lr = 0.01

In [None]:
cm.fit_generator(batches, batches.n/64, epochs = 20, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

# Convolution model

#### Tested with dropout but results were not great - worth trying varying amounts of dropout

In [19]:
def get_cnn_dropout_model():
    model = Sequential([
        Lambda(norm_input, input_shape = (3,75,75)),
        Conv2D(32, (3,3) ,activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32, (3,3),activation='relu'),
        MaxPooling2D(),
        Conv2D(64, (3,3),activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3),activation='relu'),
        MaxPooling2D(),
        Conv2D(128, (3,3),activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(128, (3,3),activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation ='relu'),
        BatchNormalization(),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer = 'Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
dn = get_cnn_dropout_model()

## 1. test linear vs log models

In [None]:
for i in range(2):
    gen = ImageDataGenerator()
    if i == 0:
        batches = gen.flow(X_train, y_train, batch_size=64, shuffle = True)
        validation_batches = gen.flow(X_validate, y_validate, batch_size=64, shuffle = True)
    else:
        batches = gen.flow(X_train_lin, y_train, batch_size=64, shuffle = True)
        validation_batches = gen.flow(X_validate_lin, y_validate, batch_size=64, shuffle = True)
    dn.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

## 2. test range of data augmentation

In [7]:
i = 1
'rotation is '+  str(20.+((i+1)*20)) + '%'

'rotation is 60.0%'

In [None]:
for i in range(5):
    gen = ImageDataGenerator(rotation_range = 20.+(i*20))
    'max rotation is: ' + str(20.+(i*20)) + ' degrees'
    batches = gen.flow(X_train, y_train, batch_size=64, shuffle = True)
    validation_batches = gen.flow(X_validate, y_validate, batch_size=64, shuffle = True)
    dn.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
for i in range(5):
    gen = ImageDataGenerator(zoom_range = (i*0.05))
    'zoom is: +/-' + str((i*0.05)) + '%'
    batches = gen.flow(X_train, y_train, batch_size=64, shuffle = True)
    validation_batches = gen.flow(X_validate, y_validate, batch_size=64, shuffle = True)
    dn.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

## 3. increase size of dataset for training

Using fit instead of fit_generator now so I need to get the features of the validation set

In [None]:
val_feat = dn.predict_generator(validation_batches, validation_batches.n)

In [None]:
aug_feat = dn.predict_generator(batches, batches.n*5)

In [None]:
aug_labels = ([y_train]*5)

In [None]:
dn.fit(aug_feat, aug_labels, batch_size = 64, epochs = 1, validation_data = (val_feat, y_validate))

# NB: Run model in such a way to save weights to make the following ensembling go faster

#### Skip to ensemble model, these calculations are no longer needed

In [None]:
dn.fit_generator(batches, batches.n/64, epochs = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
dn.optimizer.lr = 0.1

In [None]:
dn.fit_generator(batches, batches.n/64, epochs = 5, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

In [None]:
dn.optimizer.lr = 0.01

In [None]:
dn.fit_generator(batches, batches.n/64, epochs = 15, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)

## Ensemble model

Todo: use learnings from previous section to update the ensemble model to generate better predictions

In [23]:
def get_ensemble() :
    model = get_cnn_dropout_model()
    model.fit_generator(batches, batches.n/64, epochs = 1, verbose = 0, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)
    model.optimizer.lr = 0.1
    model.fit_generator(batches, batches.n/64, epochs = 4,  verbose = 0, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)
    model.optimizer.lr = 0.01
    model.fit_generator(batches, batches.n/64, epochs = 12,  verbose = 0, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)
    model.optimizer.lr = 0.001
    model.fit_generator(batches, batches.n/64, epochs = 16,  verbose = 1, 
                validation_data = validation_batches, validation_steps = validation_batches.n/64)
    return model

In [24]:
models = [get_ensemble() for i in range(3)]

  .format(self.name, input_shape))


Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


  .format(self.name, input_shape))


Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


  .format(self.name, input_shape))


Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [25]:
model_path = path+'models/'

In [26]:
for i,m in enumerate(models):
    m.save_weights(model_path + 'cnn_statoil_' + str(i) + '.pkl')

In [27]:
evals = np.array([m.evaluate(X_validate, y_validate, batch_size=256) for m in models])



In [28]:
evals.mean(axis=0)

array([ 0.31128727,  0.86555786])

In [29]:
all_preds = np.stack([m.predict(X_test, batch_size = 256) for m in models])

In [30]:
all_preds.shape

(3, 8424, 2)

In [31]:
avg_preds = all_preds.mean(axis=0)

In [32]:
avg_preds[:5]

array([[  9.87748384e-01,   1.22515941e-02],
       [  2.95811206e-01,   7.04188824e-01],
       [  9.99622107e-01,   3.77853168e-04],
       [  1.49961412e-02,   9.85003889e-01],
       [  1.97269008e-01,   8.02730978e-01]], dtype=float32)

In [38]:
avg_preds[:,1]

array([  1.22515941e-02,   7.04188824e-01,   3.77853168e-04, ...,
         8.63429084e-02,   9.87904549e-01,   9.51587975e-01], dtype=float32)

In [33]:
evals.shape

(3, 2)

In [34]:
evals

array([[ 0.31043216,  0.86070687],
       [ 0.3627157 ,  0.84823284],
       [ 0.26071394,  0.88773388]])

In [35]:
y_validate[0:5]

array([[ 0.,  1.],
       [ 0.,  1.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 0.,  1.]])

In [36]:
validate_batch[:5]

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
10,"[-21.397552, -19.753859, -23.426783, -24.65221...","[-26.72291, -27.418192, -27.787899, -25.774536...",3aac67cd,44.624,1
1003,"[-25.098461, -25.098461, -24.320147, -21.05014...","[-29.62639, -29.62639, -28.757122, -29.180954,...",b7519a52,42.559,1
1006,"[-21.582905, -15.472338, -16.417433, -16.72227...","[-25.104729, -24.326412, -28.763432, -32.92899...",204941f0,42.4664,0
101,"[-13.271194, -12.898959, -14.867657, -16.54327...","[-22.941357, -23.540695, -24.41008, -24.879778...",f9209504,,0
1012,"[-13.523142, -10.304675, -11.433078, -9.585804...","[-21.386665, -21.076504, -20.776958, -22.21468...",204afe46,32.801,1


In [43]:
submission = pd.DataFrame()
submission['id']=test_batch['id']
submission['is_iceberg']=avg_preds[:,1].clip(0.025, 0.975)
submission.to_csv(path+'submission17124.csv', index = False)

In [42]:
avg_preds[:,1].clip(0.025, 0.975)

array([ 0.025     ,  0.70418882,  0.025     , ...,  0.08634291,
        0.97500002,  0.95158798], dtype=float32)

In [52]:
from IPython.display import FileLink
import os, sys
submit_path = os.getcwd()
submit_path

'/home/ubuntu/courses/deeplearning1/nbs/statoil-nb'

In [53]:
submit_path = submit_path+'/../data/statoil'

In [55]:
FileLink(submit_path+'/submission17124.csv')