In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]= "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "1"
import numpy as np
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import keras

Using TensorFlow backend.


## VGG16 Internal Architecture

![](vgg16_model.png)

In [2]:
from keras.applications import VGG16    # VGG16 is already prebuild in Keras

## Load the VGG16 Model

In [3]:
# Load the pretrained VGG16 model
# It downloads the model if it is not downloaded
vgg_model = VGG16(weights='imagenet', include_top=True)
vgg_model.summary()







_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
____

## Load the data

In [3]:
X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
X_val = np.load('X_val.npy')

y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')
y_val = np.load('y_val.npy')

## Create a output Layer

In [4]:
from keras.layers import Dense

In [5]:
n_classes = y_train.shape[1]

In [6]:
n_classes

97

In [8]:
output = Dense(units=n_classes, activation='softmax')

## Connecting the output with VGG16 model

In [7]:
from keras.models import Model

In [10]:
inp = vgg_model.input
out = output(vgg_model.layers[-2].output)
final_model = Model(inp, out)

In [11]:
final_model

<keras.engine.training.Model at 0x7fa4c80f8490>

## Freeze the VGG Layers

In [12]:
for layer in final_model.layers[:-1]:
    layer.trainable = False

for layer in final_model.layers[-1:]:
    layer.trainable = True

## Compile the model

In [13]:
final_model.compile(loss='categorical_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])
final_model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________

## Scale the dataset

In [8]:
X_train = X_train.astype('float32') / 255.
X_val = X_val.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.

## Train the model

In [15]:
history = final_model.fit(X_train, y_train, batch_size=128, epochs=10,
                   validation_data=(X_val, y_val))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 4346 samples, validate on 931 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Overfitting!!!!

The model is clearly overfitting. But in that same 10 epochs, the validation accuracy is almost **69%** which is far better than the previous model. But the training accuracy is around **79%**. This clearly means the model is overfitting. So let's add some droupout between the **Fully Connected Dense** layers and train them.

#### First clear the GPU memory (Use only if you use GPU)

In [26]:
from numba import cuda
cuda.select_device(0)
cuda.close()

In [9]:
from keras.layers import Dropout

In [10]:
vgg_model = VGG16(weights='imagenet', include_top=True)
fc1 = vgg_model.layers[-3]
fc2 = vgg_model.layers[-2]
output = Dense(units=n_classes, activation='softmax')









In [11]:
dropout1 = Dropout(0.8)
dropout2 = Dropout(0.7)
output = Dense(units=n_classes, activation='softmax')

In [12]:
x = dropout1(fc1.output)
x = fc2(x)
x = dropout2(x)
output = output(x)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [13]:
droput_model = Model(input=vgg_model.input, output=output)

  """Entry point for launching an IPython kernel.


In [14]:
droput_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

## Freeze the Conv units in VGG Layers

In [15]:
droput_model.layers[:-5]

[<keras.engine.input_layer.InputLayer at 0x7f0b385d9790>,
 <keras.layers.convolutional.Conv2D at 0x7f0ac5c33b90>,
 <keras.layers.convolutional.Conv2D at 0x7f0ac5bc3310>,
 <keras.layers.pooling.MaxPooling2D at 0x7f0ac4d80dd0>,
 <keras.layers.convolutional.Conv2D at 0x7f0ac4d80d90>,
 <keras.layers.convolutional.Conv2D at 0x7f0a8179da10>,
 <keras.layers.pooling.MaxPooling2D at 0x7f0a81765dd0>,
 <keras.layers.convolutional.Conv2D at 0x7f0a81765b50>,
 <keras.layers.convolutional.Conv2D at 0x7f0a81706650>,
 <keras.layers.convolutional.Conv2D at 0x7f0a8171a450>,
 <keras.layers.pooling.MaxPooling2D at 0x7f0a816d1310>,
 <keras.layers.convolutional.Conv2D at 0x7f0a816d1950>,
 <keras.layers.convolutional.Conv2D at 0x7f0a816dead0>,
 <keras.layers.convolutional.Conv2D at 0x7f0a8167df90>,
 <keras.layers.pooling.MaxPooling2D at 0x7f0a8162da10>,
 <keras.layers.convolutional.Conv2D at 0x7f0a8162d710>,
 <keras.layers.convolutional.Conv2D at 0x7f0a81667a10>,
 <keras.layers.convolutional.Conv2D at 0x7f0a8

In [16]:
for layer in droput_model.layers[:-5]:
    layer.trainable = False

for layer in droput_model.layers[-5:]:
    layer.trainable = True

In [17]:
droput_model.compile(loss='categorical_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])




In [18]:
history = droput_model.fit(X_train, y_train, batch_size=128, epochs=100,
                   validation_data=(X_val, y_val))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 4346 samples, validate on 931 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100


Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [19]:
loss, accuracy = droput_model.evaluate(X_test, y_test)



In [20]:
loss

1.7852234886439573

In [21]:
accuracy

0.5718884120171673