In [1]:
from matplotlib import pyplot as plt
from keras.layers import Convolution2D
from imp import reload
%matplotlib inline

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
import my_utils;reload(my_utils)

<module 'my_utils' from 'my_utils.pyc'>

In [3]:
import my_vgg16; reload(my_vgg16)
Vgg = my_vgg16.Vgg
vgg = my_vgg16.Vgg()

In [4]:
if False:
    import vgg16; reload(vgg16)
    from vgg16 import Vgg16
    vgg = Vgg16()

In [5]:
path = "data/dogscats/"
weights_path = "data/weights"
sample_path = "data/dogscats/sample/"
models_path = "data/dogscats/models/"

batch_size = 64
train_batches = vgg.get_batches(path + "/train", batch_size=batch_size)
valid_batches = vgg.get_batches(path + "/valid", batch_size=batch_size*2)

vgg.finetune(train_batches)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [26]:
vgg.fit(train_batches, valid_batches, nb_epoch=1)

Epoch 1/1


In [6]:
# vgg.model.save_weights("data/weights/test_vgg_1_epoch.h5")
vgg.model.load_weights("data/weights/test_vgg_1_epoch.h5")

In [7]:
if False:
    predict_batch = vgg.get_batches("data/dogscats/test1/", batch_size=5, shuffle=False)
    ids = map(lambda t: int(t[7:-4]), predict_batch.filenames)
    predictions = vgg.model.predict_generator(predict_batch, val_samples=predict_batch.nb_sample)
    isdog = predictions[:,1]
    isdog = isdog.clip(0.05, 0.95)
    
    create_link(ids, isdog, submission_filename="data/submission.csv")



## We have seen until now that we can:

1) Load a pretrained model  
2) Finetune the model removing the last layer and adding a new one with custom number of outputs  
3) Positioned on 50% best submissions on kaggle  
4) Model is able to overfit  

## Improving the model

If train the model with 2 epochs is likely that will overfit, to avoid it the creator of VGG used dropout of 0.5.
But we can improve forcing overfitting and using modern techniques to reduce it, so looks like what we need is
try to force the model to overfit the data and then try to content it.

### So next steps

1) Split model into two blocks
    * Convolutional layers
    * FC layers

2) Ensure we can overfit FC layers
    * Reduce weights from VGG16 mulplying by 0.5
    * Train the model using numpy arrays
    * Reduce overfitting on FC using data augmentation

3) Batch normalization FC model
    * FC model with batch normalization after each Dense layer
        * Dropout of 0.6
        * Train and overfit
    * FC model with VGG16 BN weights

In [22]:
# split trained model into conv layers and fc layers
vgg_conv_layers, vgg_fc_layers = my_utils.split_on_last_layer(vgg.model, Convolution2D)
from keras.models import Sequential
conv_model = Sequential(vgg_conv_layers)

In [23]:
# creae new model with only FC layers without dropout, with weights divided by 2
vgg_fc_weights = [layer.get_weights() for layer in vgg_fc_layers]
fc_model = my_utils.create_fc_model(vgg_conv_layers[-1].output_shape[1:], vgg_fc_weights)

In [24]:
batch_size=64
train_batches = vgg.get_batches(path+'train', shuffle=False, batch_size=batch_size)
valid_batches = vgg.get_batches(path+'valid', shuffle=False, batch_size=batch_size)

valid_classes = valid_batches.classes
train_classes = train_batches.classes
valid_labels = my_utils.onehot(valid_classes)
train_labels = my_utils.onehot(train_classes)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [25]:
if False:
    train_features = conv_model.predict_generator(train_batches, train_batches.nb_sample)
    valid_features = conv_model.predict_generator(valid_batches, valid_batches.nb_sample)

    models_path = "data/dogscats/models/"
    my_utils.save_array(models_path + 'train_convlayer_features.bc', train_features)
    my_utils.save_array(models_path + 'valid_convlayer_features.bc', valid_features)

In [26]:
models_path = "data/dogscats/models/"
train_features = my_utils.load_array(models_path + 'train_convlayer_features.bc')
valid_features = my_utils.load_array(models_path + 'valid_convlayer_features.bc')

In [27]:
map(lambda x: x.shape, [train_features, valid_features])

[(23000, 512, 14, 14), (2000, 512, 14, 14)]

In [20]:
if True:
    fc_model.fit(train_features, train_labels, nb_epoch=6, 
             batch_size=batch_size, validation_data=(valid_features, valid_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/6

KeyboardInterrupt: 

## Lets use data augmentatin to reduce overfitting

In [28]:
# augmented images generator
from keras.preprocessing import image
gen = image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1,
                         height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True)

In [29]:
train_batches = vgg.get_batches(path+'train', gen, shuffle=True, batch_size=batch_size)
valid_batches = vgg.get_batches(path+'valid', gen, shuffle=False, batch_size=batch_size)

conv_model = Sequential(vgg_conv_layers)
fc_model = my_utils.create_fc_model(vgg_conv_layers[-1].output_shape[1:], vgg_fc_weights)
for layer in conv_model.layers: layer.trainable = False

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [30]:
conv_model.add(fc_model)

In [31]:
conv_model.compile(optimizer=my_utils.rms_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [32]:
conv_model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample, nb_epoch=2, 
                        validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f4fe91d4690>

## Fully connected model with batch normalization

We have seen that using data augmentation we can content overfitting a bit, but not much. Lets use more near state of the art techniques: *Batch normalization*

So first of all I'm going train a FC model using batch normalization with dropout, and then join it with the convolutional layers.

Batch normalization have become a standard part of the architecture of deep learning networks. As it normalizes the weights of the layer, back propagation issue is reduced.

In [45]:
vgg_bn = Vgg(batch_norm=True)
vgg_bn_conv_layers, vgg_bn_fc_layers = my_utils.split_on_last_layer(vgg_bn.model, Convolution2D)

In [46]:
fc_bn_model = my_utils.create_fc_bn_model(input_shape=vgg_bn_conv_layers[-1].output_shape[1:], dropout=0.5)

In [47]:
for from_layer, to_layer in zip(vgg_bn_fc_layers, fc_bn_model.layers):
    to_layer.set_weights(from_layer.get_weights())

In [48]:
from keras.layers import Dense
fc_bn_model.pop()
# for layer in fc_bn_model.layers: layer.trainable = False
fc_bn_model.add(Dense(2, activation="softmax"))
fc_bn_model.compile("adam", "categorical_crossentropy", ["accuracy"])

In [49]:
# fc_bn_model.optimizer.lr.set_value(0.0001)

In [50]:
fc_bn_model.fit(train_features, train_labels, nb_epoch=4,
                 batch_size=batch_size,
                 validation_data=(valid_features, valid_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f4f869d4bd0>

In [59]:
fc_bn_model.optimizer.lr.set_value(0.0001)
fc_bn_model.fit(train_features, train_labels, nb_epoch=1,
                 batch_size=batch_size,
                 validation_data=(valid_features, valid_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f4f7ee33b50>

In [60]:
fc_bn_model.save_weights(weights_path + "/fc_bn_model_5.h5")

In [61]:
fc_bn_model.load_weights(weights_path + "/fc_bn_model_5.h5")

In [62]:
from keras.layers import Dense
from keras.models import Sequential

conv_layers, fc_layers = my_utils.split_on_last_layer(Vgg().model, Convolution2D)
final_model = Sequential(conv_layers)
for layer in final_model.layers: layer.trainable = False
fc_bn_layers = my_utils.create_fc_bn_layers(input_shape=final_model.layers[-1].output_shape[1:], dropout=0.6)
fc_bn_layers.pop()
for layer in fc_bn_layers: layer.trainable = False
fc_bn_layers.append(Dense(2, activation="softmax"))

for layer in fc_bn_layers: final_model.add(layer)

for from_layer, to_layer in zip(fc_bn_model.layers, fc_bn_layers):
    to_layer.set_weights(from_layer.get_weights())

# final_model.add(fc_model)

In [63]:
final_model.compile("adam", "categorical_crossentropy", ["accuracy"])

In [64]:
batch_size = 64
train_batches = vgg.get_batches(path + "/train", batch_size=batch_size)
valid_batches = vgg.get_batches(path + "/valid", batch_size=batch_size)
final_model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample, nb_epoch=1,
                        validation_data=valid_batches, nb_val_samples=valid_batches.nb_sample)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4f7653bad0>

In [65]:
final_model.save_weights(weights_path + "/final_1_dropout_08.h5")

In [66]:
if True:
    predict_batch = vgg.get_batches("data/dogscats/test1/", batch_size=5, shuffle=False)
    ids = map(lambda t: int(t[7:-4]), predict_batch.filenames)
    predictions = final_model.predict_generator(predict_batch, val_samples=predict_batch.nb_sample)
    isdog = predictions[:,1]
    isdog = isdog.clip(0.05, 0.95)
    
    my_utils.create_link(ids, isdog, submission_filename="data/submission_final_3.csv")

Found 12500 images belonging to 1 classes.
(12500, 2)


In [67]:
my_utils.create_link(ids, isdog, submission_filename="data/submission_final_3.csv")

(12500, 2)
