This notebooks finetunes resnet50 by adding changing the final Dense layer from 1000 to 2 outputs and trains it to classify between cats and dogs.

In [1]:
import numpy as np

import tensorflow as tf

## Use base keras instead of tf.keras because of: https://github.com/tensorflow/tensorflow/issues/11868
# from tensorflow.contrib.keras import layers
# from tensorflow.contrib.keras import models
# from tensorflow.contrib.keras import optimizers
# from tensorflow.contrib.keras import applications
# from tensorflow.contrib.keras.python.keras.preprocessing import image
# from tensorflow.contrib.keras.python.keras.applications import imagenet_utils
##
from keras import layers
from keras import models
from keras import optimizers
from keras import applications
from keras.preprocessing import image
from keras.applications import imagenet_utils
##

In [2]:
def get_batches(dirpath, gen=image.ImageDataGenerator(), shuffle=True, batch_size=64, class_mode='categorical'):
    return gen.flow_from_directory(dirpath, target_size=(224, 224), class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [3]:
batch_size = 64

In [4]:
train_batches = get_batches('./data/train', batch_size=batch_size)
# train_batches = get_batches('./data/train', gen=aug_gen, batch_size=batch_size)

Found 22797 images belonging to 2 classes.


In [5]:
val_batches = get_batches('./data/valid', batch_size=batch_size)
# val_batches = get_batches('./data/valid', gen=aug_gen, batch_size=batch_size)

Found 2203 images belonging to 2 classes.


Model creation

In [6]:
resnet50 = applications.ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

In [7]:
for layer in resnet50.layers:
    layer.trainable = False

In [53]:
##

In [9]:
x = resnet50.output

In [10]:
x = layers.BatchNormalization(axis=3)(x)
x = layers.Flatten(name='flatten')(x)
x = layers.Dense(train_batches.num_class, activation='softmax', name='predictions')(x)

In [11]:
model = models.Model(inputs=resnet50.input, outputs=x)

In [13]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D) (None, 230, 230, 3)   0           input_1[0][0]                    
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 112, 112, 64)  9472        zero_padding2d_1[0][0]           
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 112, 112, 64)  256         conv1[0][0]                      
___________________________________________________________________________________________

In [14]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
epochs = 1

Run enough steps per epoch to go throught the entire training set

In [16]:
steps_per_epoch = train_batches.samples // train_batches.batch_size
validation_steps = val_batches.samples // val_batches.batch_size
steps_per_epoch

356

In [17]:
(steps_per_epoch * batch_size), train_batches.samples

(22784, 22797)

In [None]:
model.fit_generator(train_batches, validation_data=val_batches, epochs=epochs,
                    steps_per_epoch=steps_per_epoch,validation_steps=validation_steps)

This give us a validation score of around: `val_loss: 0.0686 - val_acc: 0.9752`

## Gen submission file

In [19]:
import submission

In [26]:
test_batches, steps = submission.test_batches()

Found 12500 images belonging to 1 classes.


In [27]:
preds = model.predict_generator(test_batches, steps)

In [28]:
submission.gen_file(preds, test_batches, clip=True)

This gave a score of around `0.095` on the public leaderboard 