In [1]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import keras
from keras.layers import Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, confusion_matrix
from keras import models, optimizers, Sequential, regularizers, layers
from keras.models import load_model, model_from_json, Model
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.utils import to_categorical
from keras.applications import VGG16
from keras.layers import Flatten, Dropout, Reshape, Dense, GlobalAveragePooling2D
from keras.layers.convolutional import Conv2D, MaxPooling2D
import itertools
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras import applications  
from keras import backend as k

np.random.seed(123)

Using TensorFlow backend.


In [2]:
target_size=(224, 224)
seed = 123
batch_size = 64
epochs = 50
directory = 'data/train_images/'

train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    horizontal_flip=True,
    validation_split=0.2,
    fill_mode = "nearest",
    zoom_range = 0.2,
    width_shift_range = 0.2,
    height_shift_range=0.2,
    rotation_range=20) # set validation split

train_generator = train_datagen.flow_from_directory(
    directory,
    target_size=target_size,
    batch_size=batch_size,
    seed=seed,
    class_mode="categorical",
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    directory, # same directory as training data
    target_size=target_size,
    batch_size=batch_size,
    seed=seed,
    class_mode="categorical",
    subset='validation') # set as validation data

Found 2931 images belonging to 5 classes.
Found 731 images belonging to 5 classes.


[Link to code below](https://riptutorial.com/keras/example/32608/transfer-learning-using-keras-and-vgg)

In [None]:
vgg_model = applications.VGG16(weights='imagenet',
                               include_top=False,
                               input_shape=(224, 224, 3))

# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in vgg_model.layers])

# Getting output tensor of the last VGG layer that we want to include
x = layer_dict['block2_pool'].output

# Stacking a new simple convolutional network on top of it    
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(5, activation='softmax')(x)

# Creating new model. Please note that this is NOT a Sequential() model.
from keras.models import Model
custom_model = Model(inputs=vgg_model.input, outputs=x)

# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model.layers[:7]:
    layer.trainable = False

In [None]:
custom_model.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

It's possible that the errors I've been running into with the other transfer learning setups are arising from the line containing: Model(inputs=,outputs=).
I've been leaving them as input and output. The first time I tried fitting this model, the training accuracy dropped throughout the first epoch. I changed input and output to plural, reran every cell, and the training accuracy steadily improved throughout epoch 1, which just finished.

The above is probably true, but it doesn't fix the problem of the model only predicting the most common class (0)

In [None]:
checkpoint = ModelCheckpoint("VGG16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

In [None]:
# model.compile(optimizer=optimizers.RMSprop(lr=2e-4),
#               loss='categorical_crossentropy',
#               metrics=['acc'])
 
# history = model.fit(train_features,
#                     train_labels,
#                     epochs=20,
#                     batch_size=batch_size,
#                     validation_data=(validation_features,validation_labels))

Possible Explanations for the garbage overpredictions:
* The data needs better preprocessing to be useful
* The imagenet weights are not appropriate for this problem space
* Class weights need to be added in order to correct for the overpredictions
* Over or undersampling needs to be implemented in order to correct for the overpredictions

In [None]:
model_hist = custom_model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = epochs,
    callbacks = [checkpoint, early])

In [None]:
custom_model.summary()

In [None]:
vgg_model.summary()

In [None]:
for i, layer in enumerate(custom_model.layers):
    print(i, layer.name, layer.trainable)

val_acc = 0.51124

## 2

In [None]:
layer_dict

In [None]:
for i, layer in enumerate(vgg_model.layers):
    print(i, layer.name, layer.trainable)

In [None]:
vgg_model.summary()

In [4]:
vgg_model = applications.VGG16(weights='imagenet',
                               include_top=False,
                               input_shape=(224, 224, 3))

# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in vgg_model.layers])

# Getting output tensor of the last VGG layer that we want to include
y = layer_dict['block5_pool'].output

# Stacking a new simple convolutional network on top of it    
# y = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(y)
# y = MayPooling2D(pool_size=(2, 2))(y)
y = Flatten()(y)
y = Dense(512, activation='relu')(y)
y = Dropout(0.5)(y)
y = Dense(256, activation='relu')(y)
y = Dense(5, activation='softmax')(y)

custom_model2 = Model(inputs=vgg_model.input, outputs=y)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
custom_model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [6]:
for i, layer in enumerate(custom_model2.layers):
    print(i, layer.name, layer.trainable)

0 input_2 False
1 block1_conv1 True
2 block1_conv2 True
3 block1_pool True
4 block2_conv1 True
5 block2_conv2 True
6 block2_pool True
7 block3_conv1 True
8 block3_conv2 True
9 block3_conv3 True
10 block3_pool True
11 block4_conv1 True
12 block4_conv2 True
13 block4_conv3 True
14 block4_pool True
15 block5_conv1 True
16 block5_conv2 True
17 block5_conv3 True
18 block5_pool True
19 flatten_1 True
20 dense_1 True
21 dropout_1 True
22 dense_2 True
23 dense_3 True


In [7]:
# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model2.layers[:19]:
    layer.trainable = False

In [8]:
for i, layer in enumerate(custom_model2.layers):
    print(i, layer.name, layer.trainable)

0 input_2 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_pool False
11 block4_conv1 False
12 block4_conv2 False
13 block4_conv3 False
14 block4_pool False
15 block5_conv1 False
16 block5_conv2 False
17 block5_conv3 False
18 block5_pool False
19 flatten_1 True
20 dense_1 True
21 dropout_1 True
22 dense_2 True
23 dense_3 True


In [None]:
custom_model2.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

In [None]:
checkpoint = ModelCheckpoint("VGG16_2.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')


This model includes all VGG16 layers besides the top. Additionally, an extra FC layer has been added and the first FC layer has been changed from 256 to 512 nodes, with the second FC layer having 256. This model does not seem to be experiencing the same problems as the previous VGG16 model. Namely, it is not only predicting the most common class (0). This could be due to either utilizing all of the VGG16 layers instead of just the first 7 or it could be due to the increased complexity in the FC layers.

In [None]:
model_hist2 = custom_model2.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = epochs,
    callbacks = [checkpoint, early])

val_acc = 0.71814

## 3

In [None]:
vgg_model = applications.VGG16(weights='imagenet',
                               include_top=False,
                               input_shape=(224, 224, 3))

# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in vgg_model.layers])

# Getting output tensor of the last VGG layer that we want to include
z = layer_dict['block5_pool'].output

# Stacking a new simple convolutional network on top of it    
# z = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(z)
# z = MayPooling2D(pool_size=(2, 2))(z)
z = Flatten()(z)
z = Dense(512, activation='relu')(z)
z = Dropout(0.5)(z)
z = Dense(512, activation='relu')(z)
z = Dense(5, activation='softmax')(z)

custom_model3 = Model(inputs=vgg_model.input, outputs=z)

Accidentally saved this model as custom_model2 so that variable is overwritten

In [None]:
custom_model3.summary()

In [None]:
for i, layer in enumerate(custom_model3.layers):
    print(i, layer.name, layer.trainable)

In [None]:
# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model3.layers[:19]:
    layer.trainable = False

In [None]:
for i, layer in enumerate(custom_model3.layers):
    print(i, layer.name, layer.trainable)

In [None]:
custom_model3.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

In [None]:
checkpoint = ModelCheckpoint("VGG16_3.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')
tensorboard = TensorBoard()


Version 3: This model is the same as custom_model2 except the FC layers have doubled in nodes to see added complexity's effect on accuracy.

Version 3.1: The original version 3 levelled off at 29% accuracy. I reran it with 512 nodes in both FC layers instead of the 1024 and 512 nodes used in the original version 3.

In [None]:
model_hist3 = custom_model3.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = epochs,
    callbacks = [checkpoint, early, tensorboard])

val_acc = 0.71964

## 4

In [None]:
vgg_model = applications.VGG16(weights='imagenet',
                               include_top=False,
                               input_shape=(224, 224, 3))

# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in vgg_model.layers])

# Getting output tensor of the last VGG layer that we want to include
x1 = layer_dict['block4_pool'].output

# Stacking a new simple convolutional network on top of it    
# x1 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x1)
# x1 = MayPooling2D(pool_size=(2, 2))(x1)
x1 = Flatten()(x1)
x1 = Dense(128, activation='relu')(x1)
x1 = Dropout(0.5)(x1)
x1 = Dense(128, activation='relu')(x1)
x1 = Dense(5, activation='softmax')(x1)

custom_model4 = Model(inputs=vgg_model.input, outputs=x1)

In [11]:
custom_model4.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [12]:
for i, layer in enumerate(custom_model4.layers):
    print(i, layer.name, layer.trainable)

0 input_1 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_pool False
11 block4_conv1 False
12 block4_conv2 False
13 block4_conv3 False
14 block4_pool False
15 flatten_1 True
16 dense_1 True
17 dropout_1 True
18 dense_2 True
19 dense_3 True


In [13]:
# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model4.layers[:15]:
    layer.trainable = False

In [14]:
for i, layer in enumerate(custom_model4.layers):
    print(i, layer.name, layer.trainable)

0 input_1 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_pool False
11 block4_conv1 False
12 block4_conv2 False
13 block4_conv3 False
14 block4_pool False
15 flatten_1 True
16 dense_1 True
17 dropout_1 True
18 dense_2 True
19 dense_3 True


In [15]:
custom_model4.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

In [16]:
checkpoint = ModelCheckpoint("VGG16_4.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')
tensorboard = TensorBoard()


Version 4: This model is the same as version 3 except block 5 has been removed and FC complexity has been reduced.

In [17]:
model_hist4 = custom_model4.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = epochs,
    callbacks = [checkpoint, early, tensorboard])

Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.49025, saving model to VGG16_4.h5
Epoch 2/50

Epoch 00002: val_acc improved from 0.49025 to 0.49925, saving model to VGG16_4.h5
Epoch 3/50

Epoch 00003: val_acc did not improve from 0.49925
Epoch 4/50

Epoch 00004: val_acc did not improve from 0.49925
Epoch 5/50

Epoch 00005: val_acc improved from 0.49925 to 0.50975, saving model to VGG16_4.h5
Epoch 6/50

Epoch 00006: val_acc did not improve from 0.50975
Epoch 7/50

Epoch 00007: val_acc did not improve from 0.50975
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.50975
Epoch 9/50

Epoch 00009: val_acc did not improve from 0.50975
Epoch 10/50

Epoch 00010: val_acc did not improve from 0.50975
Epoch 11/50

Epoch 00011: val_acc did not improve from 0.50975
Epoch 12/50

Epoch 00012: val_acc did not improve from 0.50975
Epoch 13/50

Epoch 00013: val_acc did not improve from 0.50975
Epoch 14/50

Epoch 00014: val_acc did not improve from 0.50975
Epoch 15/50

Epoch 00015: val_acc 