# CNN model
[VGG16](https://neurohive.io/en/popular-networks/vgg16/)

In [4]:
import numpy as np 
import pandas as pd 
import os
from glob import glob
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
from skimage import io

from keras.preprocessing.image import ImageDataGenerator
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.models import Sequential, Model
from keras.applications.vgg16 import VGG16
from keras.applications.resnet import ResNet50 
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau

train_df = pd.read_csv('train.csv')
valid_df = pd.read_csv('test.csv')

### Image augmentation

In [3]:
IMG_SIZE = (224, 224)

train_idg = ImageDataGenerator(rescale=1. / 255.0,
                              horizontal_flip = True, 
                              vertical_flip = False, 
                              height_shift_range= 0.1, 
                              width_shift_range=0.1, 
                              rotation_range=20, 
                              shear_range = 0.1,
                              zoom_range=0.1)

train_gen = train_idg.flow_from_dataframe(dataframe=train_df, 
                                         directory=None, 
                                         x_col = 'img_path',
                                         y_col = 'class',
                                         class_mode = 'binary',
                                         target_size = IMG_SIZE, 
                                         batch_size = 9
                                         )

# the validation data should not be augmented
val_idg = ImageDataGenerator(rescale=1. / 255.0
                                 )

val_gen = val_idg.flow_from_dataframe(dataframe=valid_df, 
                                         directory=None, 
                                         x_col = 'img_path',
                                         y_col = 'class',
                                         class_mode = 'binary',
                                         target_size = IMG_SIZE, 
                                         batch_size = 6) ## We've only been provided with 6 validation images

Found 20 validated image filenames belonging to 2 classes.
Found 6 validated image filenames belonging to 2 classes.


In [6]:
## Pull a single large batch of random validation data for testing after each epoch
testX, testY = val_gen.next()

## Load in VGG16 with pre-trained ImageNet weights: 

In [7]:
model = VGG16(include_top=True, weights='imagenet')
model.summary()


Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0    

In [8]:
transfer_layer = model.get_layer('block5_pool')
vgg_model = Model(inputs=model.input,
                   outputs=transfer_layer.output)

## choose the layers of VGG16 want to fine-tune, freeze all but the last convolutional layer
for layer in vgg_model.layers[0:17]:
    layer.trainable = False

for layer in vgg_model.layers:
    print(layer.name, layer.trainable)

input_1 False
block1_conv1 False
block1_conv2 False
block1_pool False
block2_conv1 False
block2_conv2 False
block2_pool False
block3_conv1 False
block3_conv2 False
block3_conv3 False
block3_pool False
block4_conv1 False
block4_conv2 False
block4_conv3 False
block4_pool False
block5_conv1 False
block5_conv2 False
block5_conv3 True
block5_pool True


In [9]:
new_model = Sequential()

# Add the convolutional part of the VGG16 model 
new_model.add(vgg_model)

# Flatten the output of the VGG16 model because it is from convolutional layer.
new_model.add(Flatten())

# Add a dense (aka. fully-connected) layer.
# This is for combining features that the VGG16 model has recognized in the image.
new_model.add(Dense(1, activation='sigmoid'))

## Set our optimizer, loss function, and learning rate
optimizer = Adam(lr=1e-4)
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']

new_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## run a single epoch to check:
new_model.fit_generator(train_gen, 
                                  validation_data = (testX, testY), 
                                  epochs = 5)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x28200128e88>

## Add more dense layers

In [10]:
new_model = Sequential()

new_model.add(vgg_model)

new_model.add(Flatten())

new_model.add(Dense(1024, activation='relu'))

new_model.add(Dense(512, activation='relu'))

new_model.add(Dense(1, activation='sigmoid'))

new_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

new_model.fit_generator(train_gen, 
                                  validation_data = (testX, testY), 
                                  epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x282003c46c8>

### Add dropout and another fully connected layer

In [11]:
new_model = Sequential()
new_model.add(vgg_model)
new_model.add(Flatten())

# Add a dropout-layer which may prevent overfitting 
new_model.add(Dropout(0.5))

# Add a dense (aka. fully-connected) layer
new_model.add(Dense(1024, activation='relu'))

# Add a dropout-layer 
new_model.add(Dropout(0.5))

# Add a dense (aka. fully-connected) layer.
new_model.add(Dense(512, activation='relu'))

# Add a dropout-layer 
new_model.add(Dropout(0.5))

# Add a dense (aka. fully-connected) layer
new_model.add(Dense(256, activation='relu'))

# Add a dense (aka. fully-connected) layer.
# Change the activation function to sigmoid 
# so output of the last layer is in the range of [0,1] 
new_model.add(Dense(1, activation='sigmoid'))

new_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## run a single epoch 
new_model.fit_generator(train_gen, 
                                  validation_data = (testX, testY), 
                                  epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x28201db6bc8>

In [None]:
 The last architecture seemed to show more stable than the second. Likely due to the fact that added Dropout. 