In [2]:
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras import utils,losses
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Lambda, Conv2D,MaxPooling2D
from tensorflow.keras.optimizers import RMSprop,Adadelta
from tensorflow.keras import backend as K
from tensorflow.keras.applications import VGG16

import matplotlib.pyplot as plt
import cv2

# Config the matplotlib backend as plotting inline in IPython
%matplotlib inline

In [4]:
# load model and specify a new input shape for images
#new_input = Input(shape=(640, 480, 3))
new_input = Input(shape=(224, 224, 3))
#model = VGG16(include_top=False, input_tensor=new_input)
model = VGG16(include_top=True, input_tensor=new_input)
print (model.output.shape)
model.summary()

(None, 1000)
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)

In [4]:
# A Dense classifier with a single unit (binary classification)
x = model(new_input, training=False)
x=  Flatten()(x)
outputs = Dense(10,activation="softmax")(x)
my_vgg16 = Model(new_input, outputs)

In [5]:
my_vgg16.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 640, 480, 3)]     0         
_________________________________________________________________
vgg16 (Model)                (None, 20, 15, 512)       14714688  
_________________________________________________________________
flatten (Flatten)            (None, 153600)            0         
_________________________________________________________________
dense (Dense)                (None, 10)                1536010   
Total params: 16,250,698
Trainable params: 16,250,698
Non-trainable params: 0
_________________________________________________________________


From: https://towardsdatascience.com/step-by-step-guide-to-using-pretrained-models-in-keras-c9097b647b29
VGG16 is trained on RGB images of size (224, 224), which is a default input size of the network. We can also feed the input image other than the default size. But the height and width of the image should be more than 32 pixels. We can only feed other size images when we exclude the default classifier from the network. Following is an example showing the input size of (32, 64, 3). The last dimension which is 3, represents the number of color channels.

In [9]:
# load model and specify a new input shape for images
new_input = Input(shape=(32, 32, 3))
model = VGG16(include_top=False, input_tensor=new_input)
#model = VGG16(include_top=True, input_tensor=new_input)
print (model.output.shape)
model.summary()

(None, 1, 1, 512)
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 1

Pooling

We can apply 2 types of pooling on the final output from Convolution Layers. global average pooling and global maximum pooling.

In global maximum pooling, we select a maximum number over each slide of a tensor as shown in the following image.
Global Maximum Pooling

Suppose the output tensor from the convolution layers is of shape (7, 7, 512). If we apply global maximum pooling, we select a maximum number from each (7, 7) slide, which gives us a total of 512 numbers. Average pooling does the same except for taking the average instead of the maximum.

In order to use pooling, we have to set argument pooling to max or avg to use this 2 pooling. In the following example, I am using global average pooling.

Global pooling is useful when we have a variable size of input images. Suppose we have 2 different sizes of output tensor from different sizes of images. The shape of the output tensor is (3, 3, 512) and (7, 7, 512). After applying global pooling on any of these tensors will get us a fixed-size vector of length 512. So the final output of variable size images will still be a fixed size vector after applying global pooling.

In [5]:
# load model and specify a new input shape for images
new_input = Input(shape=(32, 32, 3))
model = VGG16(include_top=False, input_tensor=new_input, pooling='avg')
#model = VGG16(include_top=True, input_tensor=new_input)
print (model.output.shape)
model.summary()

(None, 512)
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)   

Freezing layers

Before training the network you may want to freeze some of its layers depending upon the task. Once a layer is frozen, its weights are not updated while training.

In the following example, I am freezing the top 10 layers of the network. I have printed all layers in the network and whether they are trainable or not. We can see only the top 10 layers are not trainable.
Freezing top 10 layers

If the current dataset is similar to the dataset these networks were trained on, then its good to freeze all layers since images in both datasets would have similar features. But if the dataset if different then we should only freeze top layers and train bottom layers because top layers extract general features. More similar the dataset more layers we should freeze.

In [6]:
# load model and specify a new input shape for images
new_input = Input(shape=(32, 32, 3))
model = VGG16(include_top=False, input_tensor=new_input, pooling='avg')
#model = VGG16(include_top=True, input_tensor=new_input)
print (model.output.shape)
model.summary()
for layer in model.layers[:10]:
    layer.trainable=False
for layer in model.layers:
    sp='        '[len(layer.name)-9:]
    print(layer.name,sp,layer.trainable)

(None, 512)
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)   

In [8]:
# load model and specify a new input shape for images
new_input = Input(shape=(32, 32, 3))
#model = VGG16(include_top=True, input_tensor=new_input, pooling='avg')
model = VGG16(include_top=True, input_tensor=new_input)
print (model.output.shape)
model.summary()

ValueError: Shapes (512, 4096) and (25088, 4096) are incompatible