In [1]:
# Learn to use pre-trained models from keras: https://keras.io/api/applications/
# First we check that we can use the pre-trained network from keras to do inference

import tensorflow as tf
import numpy as np

from tensorflow import keras
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow import convert_to_tensor

# Use the following line to download the weights to C:\Users\<<username>>\.keras\models
#model = MobileNetV2(weights='imagenet')

# I shfited the weights to my own folder
model = MobileNetV2(weights='D:/data/vision_imagenet/mobilenetv2_pre_trained.h5')

img_path = 'D:/data/vision_imagenet/test_images/'

def mobilenet_preprocess(img):
    return convert_to_tensor(preprocess_input(img), dtype=tf.float32)
    
test_datagen = ImageDataGenerator(preprocessing_function = mobilenet_preprocess)
test_generator = test_datagen.flow_from_directory(
    img_path,
    target_size=(224, 224),
    batch_size=64,
    shuffle = False
)

# Generating dictionary to return class 
imagenet_classes = {}
with open("D:/data/vision_imagenet/imagenet1000_clsidx_to_labels.txt") as f:
    for line in f:
        line = line.replace(" ", "")
        line = line.replace("\n", "")
        line = line.replace("'", "")
        [key,val]=line.split(":")
        imagenet_classes[int(key)] = val

# Performing Inference
img, _ = test_generator.next()
preds = model(img)
preds = np.argmax(preds,axis=1)

# printing out the predicted classes
# Interesting that the VGG (pytorch) classified the first image as cucumber
# While MobileNet classified it as a zucchini

for i in range(len(preds)):
    print(imagenet_classes[preds[i]])

#getting the weights into numpy
weights = model.get_weights()

Found 4 images belonging to 1 classes.
zucchini,courgette,
redfox,Vulpesvulpes,
fryingpan,frypan,skillet,
anemonefish,


In [2]:
# Printing the name of the layers in the MobileNet V2 model
for i in range(len(model.layers)):
    print(model.layers[i].name)

input_1
Conv1
bn_Conv1
Conv1_relu
expanded_conv_depthwise
expanded_conv_depthwise_BN
expanded_conv_depthwise_relu
expanded_conv_project
expanded_conv_project_BN
block_1_expand
block_1_expand_BN
block_1_expand_relu
block_1_pad
block_1_depthwise
block_1_depthwise_BN
block_1_depthwise_relu
block_1_project
block_1_project_BN
block_2_expand
block_2_expand_BN
block_2_expand_relu
block_2_depthwise
block_2_depthwise_BN
block_2_depthwise_relu
block_2_project
block_2_project_BN
block_2_add
block_3_expand
block_3_expand_BN
block_3_expand_relu
block_3_pad
block_3_depthwise
block_3_depthwise_BN
block_3_depthwise_relu
block_3_project
block_3_project_BN
block_4_expand
block_4_expand_BN
block_4_expand_relu
block_4_depthwise
block_4_depthwise_BN
block_4_depthwise_relu
block_4_project
block_4_project_BN
block_4_add
block_5_expand
block_5_expand_BN
block_5_expand_relu
block_5_depthwise
block_5_depthwise_BN
block_5_depthwise_relu
block_5_project
block_5_project_BN
block_5_add
block_6_expand
block_6_expand

In [3]:
# Implementing MobileNet V2 to check if I understand the concepts
#
# We will:
# 1) Implement the model structure of MobileNet V2 until the 2nd bottleneck layer within the paper
# 2) Load the keras pre-trained weights into the model
# 3) Check that the outputs from our custom implementation is the same as the outputs from the keras model
# Reason why we stop at the 2nd bottleneck layer is because this is the first instance of a residual connection being made
# the rest of the model is repeating whatever has been done so far
#
# Mobilenet V2 explanation: https://www.youtube.com/watch?v=eZzr780Qxfg&list=PLLCGSi_WZBNftPaTaX4k4AwLp4VreDAwV&index=10
#
# Print intermediate output to check against my model implementation

from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

model_intermediate = Model(inputs=model.input, outputs=model.get_layer('block_2_add').output)
img_path = "D:/data/vision_imagenet/test_images/0/fox.jpg"

img = image.load_img(img_path, target_size=(224, 224))
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)
img = preprocess_input(img)
img = tf.convert_to_tensor(img)

intermediate_output = model_intermediate(img)

print(model_intermediate.summary())
print(intermediate_output)

# The following lines were used to print a visualisation from the keras model for reference
# Very helpful in understanding the model architecture
#from keras.utils.vis_utils import plot_model
#plot_model(model, show_shapes=True, show_layer_names=True)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                                             

In [17]:
# Our custom implementation of the MobileNet V2 architecture

from tensorflow.keras import datasets, layers, models
from tensorflow.math import add

# This series of layers is called "Conv1" in the keras implementation
# It is the conv2d layer in the paper. output channels = 32
# Model is using ReLU 6, which has a max_value of 6, more efficient when using low precision computations

output_channels = 32
stride = 2

x_input = layers.Input((224, 224, 3))
x = layers.Conv2D(output_channels, (3, 3), strides=(stride, stride), padding='same', use_bias=False)(x_input)
x = layers.BatchNormalization()(x)
x = keras.layers.ReLU(max_value=6)(x)

# This series of layers is called "expanded" in the keras implementation
# It is the first bottleneck layer in the paper. output channels = 16
# This features the depthwise conv2d which does 3x3 conv and outputs 1 channel per layer
# The 1x1 conv (cheaper than 3x3) is to get a combination across all the channels
# bias are all not required because we are using batch norm right after

output_channels = 16

x = layers.DepthwiseConv2D(kernel_size=(3,3),padding='same',use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = keras.layers.ReLU(max_value=6)(x)

x = layers.Conv2D(output_channels, (1, 1), use_bias=False)(x)
x = layers.BatchNormalization()(x)

# This next series of layers is called "block_1" in the keras implementation
# It is the second bottleneck layer in the paper. 
# Expansion factor, t = 6. output channels = 24. Stride =2.
# Expansion layers are 1x1 regular conv2d

input_channels = 16
output_channels = 24
expansion_factor = 6
stride = 2

x = layers.Conv2D(input_channels*expansion_factor, (1, 1), padding='same', use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = keras.layers.ReLU(max_value=6)(x)

x = layers.DepthwiseConv2D(kernel_size=(3,3),strides=(stride, stride),padding='same',use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = keras.layers.ReLU(max_value=6)(x)

x = layers.Conv2D(output_channels, (1, 1), use_bias=False)(x)
x = layers.BatchNormalization()(x)

# This next series of layers is called "block_2" in the keras implementation
# This is the repeat (n=2) in the second bottleneck layer in the paper
# First residual connection. We use a new variable y to store all the intermediate calculations
# Expansion factor, t=6. output channnels = 24. Stride = 1.

input_channels = 24
output_channels = 24
expansion_factor = 6
stride = 1

y = layers.Conv2D(input_channels*expansion_factor, (1, 1), padding='same', use_bias=False)(x)
y = layers.BatchNormalization()(y)
y = keras.layers.ReLU(max_value=6)(y)

y = layers.DepthwiseConv2D(kernel_size=(3,3),strides=(stride, stride),padding='same',use_bias=False)(y)
y = layers.BatchNormalization()(y)
y = keras.layers.ReLU(max_value=6)(y)

y = layers.Conv2D(output_channels, (1, 1), use_bias=False)(y)
y = layers.BatchNormalization()(y)

x = add(x, y) 

# Compiling the model
model_custom = models.Model(inputs = x_input, outputs = x)

model_custom.compile(
  optimizer='adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'])

model_custom.summary()

#setting weight from pre-trained model into custom model

#Layer 0 is the input layer

#keras model: conv1

model_custom.layers[1].weights[0].assign(weights[0])

#batch norm layer as 4 x 32 weights
model_custom.layers[2].weights[0].assign(weights[1])
model_custom.layers[2].weights[1].assign(weights[2])
model_custom.layers[2].weights[2].assign(weights[3])
model_custom.layers[2].weights[3].assign(weights[4])

#layer 3 is relu with no weights

#keras model: expanded

model_custom.layers[4].weights[0].assign(weights[5])

model_custom.layers[5].weights[0].assign(weights[6])
model_custom.layers[5].weights[1].assign(weights[7])
model_custom.layers[5].weights[2].assign(weights[8])
model_custom.layers[5].weights[3].assign(weights[9])

#layer 6 is relu with no weights

model_custom.layers[7].weights[0].assign(weights[10])

model_custom.layers[8].weights[0].assign(weights[11])
model_custom.layers[8].weights[1].assign(weights[12])
model_custom.layers[8].weights[2].assign(weights[13])
model_custom.layers[8].weights[3].assign(weights[14])

#keras model: block1

model_custom.layers[9].weights[0].assign(weights[15])

model_custom.layers[10].weights[0].assign(weights[16])
model_custom.layers[10].weights[1].assign(weights[17])
model_custom.layers[10].weights[2].assign(weights[18])
model_custom.layers[10].weights[3].assign(weights[19])

#layer 11 is relu with no weights

model_custom.layers[12].weights[0].assign(weights[20])

model_custom.layers[13].weights[0].assign(weights[21])
model_custom.layers[13].weights[1].assign(weights[22])
model_custom.layers[13].weights[2].assign(weights[23])
model_custom.layers[13].weights[3].assign(weights[24])

#layer 14 is relu with no weights

model_custom.layers[15].weights[0].assign(weights[25])

model_custom.layers[16].weights[0].assign(weights[26])
model_custom.layers[16].weights[1].assign(weights[27])
model_custom.layers[16].weights[2].assign(weights[28])
model_custom.layers[16].weights[3].assign(weights[29])

#Block2
model_custom.layers[17].weights[0].assign(weights[30])

model_custom.layers[18].weights[0].assign(weights[31])
model_custom.layers[18].weights[1].assign(weights[32])
model_custom.layers[18].weights[2].assign(weights[33])
model_custom.layers[18].weights[3].assign(weights[34])

#layer 19 is relu with no weights

model_custom.layers[20].weights[0].assign(weights[35])

model_custom.layers[21].weights[0].assign(weights[36])
model_custom.layers[21].weights[1].assign(weights[37])
model_custom.layers[21].weights[2].assign(weights[38])
model_custom.layers[21].weights[3].assign(weights[39])

#layer 22 is relu with no weights

model_custom.layers[23].weights[0].assign(weights[40])

model_custom.layers[24].weights[0].assign(weights[41])
model_custom.layers[24].weights[1].assign(weights[42])
model_custom.layers[24].weights[2].assign(weights[43])
model_custom.layers[24].weights[3].assign(weights[44])

#layer 25 is add with no weights

intermediate_output = model_custom(img)
print(intermediate_output)

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_7 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_24 (Conv2D)             (None, 112, 112, 32  864         ['input_7[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization_36 (BatchN  (None, 112, 112, 32  128        ['conv2d_24[0][0]']              
 ormalization)                  )                                                           