# Deep Learning: Dogs vs Cats Analysis

In [14]:
%matplotlib inline
import math
import numpy as np
import utils; reload(utils)
from utils import *

from sympy import Symbol
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Lambda, Dense
from keras import backend as K
from matplotlib import pyplot as plt

## Workflow for each analysis type (e.g basic, 1 Dense layer...):

1. Create model
2. Train it with the default "Learning Rate" of 0.01 just 1 epoch so we see the speed with what the accuracy is increasing.
3. Increase the "Learning Rate" to 0.1 and train the model between 4 and 12 epochs.
4. Decrease the "Learning Rate" to 0.01 and train the model 4 epochs.
5. Decrease the "Learning Rate" to 0.001 and train the model 2 epochs.
6. Decrease the "Learning Rate" to 0.0001 and train the model 1 epoch.

In [15]:
# We set the "seed" so we make the results a bit more predictable.
np.random.seed(1)

In [16]:
path = 'sample/'
batch_size = 1

In [17]:
vgg = Vgg16()

  .format(self.name, input_shape))


In [18]:
gen = image.ImageDataGenerator()
train_batches = gen.flow_from_directory(path + '/train', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
val_batches = gen.flow_from_directory(path + '/valid', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
test_batches = gen.flow_from_directory(path + '/test', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)

Found 40 images belonging to 2 classes.
Found 20 images belonging to 2 classes.
Found 20 images belonging to 1 classes.


In [19]:
vgg.model.pop()
for layer in vgg.model.layers: layer.trainable=False
vgg.model.add(Dense(train_batches.nb_class, activation='softmax'))
vgg.model.compile(optimizer=Adam(lr=0.01),
                loss='categorical_crossentropy', metrics=['accuracy'])
vgg.model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_2 (Lambda)                (None, 3, 224, 224)   0           lambda_input_2[0][0]             
____________________________________________________________________________________________________
zeropadding2d_14 (ZeroPadding2D) (None, 3, 226, 226)   0           lambda_2[0][0]                   
____________________________________________________________________________________________________
convolution2d_14 (Convolution2D) (None, 64, 224, 224)  1792        zeropadding2d_14[0][0]           
____________________________________________________________________________________________________
zeropadding2d_15 (ZeroPadding2D) (None, 64, 226, 226)  0           convolution2d_14[0][0]           
___________________________________________________________________________________________

In [20]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x11f631990>

In [21]:
vgg.model.optimizer.lr=0.1

In [22]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/4
Epoch 2/4
Epoch 3/4

KeyboardInterrupt: 

In [6]:
vgg.model.optimizer.lr=0.01

In [7]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=8, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [8]:
vgg.model.optimizer.lr=0.001

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=2, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

# With Data Augmentation

In [28]:
vgg = Vgg16()

  .format(self.name, input_shape))


In [38]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)

train_batches = gen.flow_from_directory(path + '/train', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
val_batches = gen.flow_from_directory(path + '/valid', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
test_batches = gen.flow_from_directory(path + '/test', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)

Found 40 images belonging to 2 classes.
Found 20 images belonging to 2 classes.
Found 20 images belonging to 1 classes.


In [39]:
vgg.model.pop()
for layer in vgg.model.layers: layer.trainable=False
vgg.model.add(Dense(train_batches.nb_class, activation='softmax'))
vgg.model.compile(optimizer=Adam(lr=0.01),
                loss='categorical_crossentropy', metrics=['accuracy'])
vgg.model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_5 (Lambda)                (None, 3, 224, 224)   0           lambda_input_5[0][0]             
____________________________________________________________________________________________________
zeropadding2d_53 (ZeroPadding2D) (None, 3, 226, 226)   0           lambda_5[0][0]                   
____________________________________________________________________________________________________
convolution2d_53 (Convolution2D) (None, 64, 224, 224)  1792        zeropadding2d_53[0][0]           
____________________________________________________________________________________________________
zeropadding2d_54 (ZeroPadding2D) (None, 64, 226, 226)  0           convolution2d_53[0][0]           
___________________________________________________________________________________________

In [36]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/1

KeyboardInterrupt: 

In [None]:
vgg.model.optimizer.lr=0.1

In [13]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/4
Epoch 2/4

KeyboardInterrupt: 

In [None]:
vgg.model.optimizer.lr=0.01

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=8, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
vgg.model.optimizer.lr=0.001

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=2, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

# With Data Augmentation + Training all Dense layers

In [None]:
vgg = Vgg16()

In [None]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
train_batches = gen.flow_from_directory(path + '/train', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
val_batches = gen.flow_from_directory(path + '/valid', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
test_batches = gen.flow_from_directory(path + '/test', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)

In [None]:
vgg.model.pop()

vgg.model.add(Dense(train_batches.nb_class, activation='softmax'))
dense_layers_idx = [index for index, layer in enumerate(vgg.model.layers) if type(layer) is Dense]
for idx, layer in enumerate(vgg.model.layers):
    layer.trainable = False
    if idx in dense_layers_idx:
        layer.trainable = True

vgg.model.compile(optimizer=Adam(lr=0.01),
                loss='categorical_crossentropy', metrics=['accuracy'])
vgg.model.summary()

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
vgg.model.optimizer.lr=0.1

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/4
Epoch 2/4

In [None]:
vgg.model.optimizer.lr=0.01

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=8, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
vgg.model.optimizer.lr=0.001

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=2, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

# With Data Augmentation + Training all layers

In [23]:
vgg = Vgg16()

  .format(self.name, input_shape))


In [None]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
train_batches = gen.flow_from_directory(path + '/train', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
val_batches = gen.flow_from_directory(path + '/valid', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
test_batches = gen.flow_from_directory(path + '/test', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)

In [None]:
vgg.model.pop()

vgg.model.add(Dense(train_batches.nb_class, activation='softmax'))
for idx, layer in enumerate(vgg.model.layers):
    layer.trainable = True

vgg.model.compile(optimizer=Adam(lr=0.01),
                loss='categorical_crossentropy', metrics=['accuracy'])
vgg.model.summary()

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
vgg.model.optimizer.lr=0.1

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/4
Epoch 2/4

In [None]:
vgg.model.optimizer.lr=0.01

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=8, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
vgg.model.optimizer.lr=0.001

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=2, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

# With Data Augmentation + Training all layers + lower Dropout (0.2)

In [24]:
vgg = Vgg16()

  .format(self.name, input_shape))


In [25]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
train_batches = gen.flow_from_directory(path + '/train', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
val_batches = gen.flow_from_directory(path + '/valid', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)
test_batches = gen.flow_from_directory(path + '/test', target_size=(224,224),
                class_mode='categorical', shuffle=True, batch_size=batch_size)

Found 40 images belonging to 2 classes.
Found 20 images belonging to 2 classes.
Found 20 images belonging to 1 classes.


In [27]:
vgg.model.pop()

vgg.model.add(Dense(train_batches.nb_class, activation='softmax'))
dropout_layers_idx = [index for index, layer in enumerate(vgg.model.layers) if type(layer) is Dropout]

for idx, layer in enumerate(vgg.model.layers):
    layer.p = 0.2

vgg.model.compile(optimizer=Adam(lr=0.01),
                loss='categorical_crossentropy', metrics=['accuracy'])
vgg.model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_4 (Lambda)                (None, 3, 224, 224)   0           lambda_input_4[0][0]             
____________________________________________________________________________________________________
zeropadding2d_40 (ZeroPadding2D) (None, 3, 226, 226)   0           lambda_4[0][0]                   
____________________________________________________________________________________________________
convolution2d_40 (Convolution2D) (None, 64, 224, 224)  1792        zeropadding2d_40[0][0]           
____________________________________________________________________________________________________
zeropadding2d_41 (ZeroPadding2D) (None, 64, 226, 226)  0           convolution2d_40[0][0]           
___________________________________________________________________________________________

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
vgg.model.optimizer.lr=0.1

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/4
Epoch 2/4

In [None]:
vgg.model.optimizer.lr=0.01

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=8, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
vgg.model.optimizer.lr=0.001

In [None]:
vgg.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                nb_epoch=2, validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

# Predict Test set + create Kaggle submission file

In [46]:
predictions = vgg.model.predict_generator(test_batches, test_batches.nb_sample)
isdog = predictions[:,1]

In [48]:
isdog = isdog.clip(min=0.05, max=0.95)

In [67]:
#Extract imageIds from the filenames in our test/unknown directory 
filenames = test_batches.filenames

ids = np.array([int(f[8:f.find('.')]) for f in filenames])

In [68]:
subm = np.stack([ids,isdog], axis=1)
subm[:5]

array([[  1.    ,   0.339 ],
       [ 10.    ,   0.95  ],
       [ 11.    ,   0.7307],
       [ 12.    ,   0.95  ],
       [ 13.    ,   0.4694]])

In [69]:
submission_file_name = 'submission1.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')

In [70]:
from IPython.display import FileLink
FileLink(submission_file_name)