# Computer Vision: Using a Pre-Trained Model

This classification is based on the **Plant Seedlings Dataset**, which contains images of approximately 960 unique plants belonging to 12 species at several growth stages, with a resolution of about 10 pixels per mm of annotated RGB images.

The dataset includes the following species:


|English     |Latin               |EPPO|
|:-----------|:-------------------|:---|
|Maize       |Zea mays L.         |ZEAMX|
|Common wheat|Triticum aestivum L.|TRZAX|
|Sugar beet|Beta vulgaris var. altissima|BEAVA|
|Scentless Mayweed|Matricaria perforata Mérat|MATIN|
|Common Chickweed|Stellaria media|STEME|
|Shepherd’s Purse|Capsella bursa-pastoris|CAPBP|
|Cleavers|Galium aparine L.|GALAP|
|Charlock|Sinapis arvensis L.|SINAR|
|Fat Hen|Chenopodium album L.|CHEAL|
|Small-flowered Cranesbill|Geranium pusillum|GERSS|
|Black-grass|Alopecurus myosuroides|ALOMY|
|Loose Silky-bent|Apera spica-venti|APESV|

In [1]:
%matplotlib inline
%load_ext watermark
%watermark -v -m -p numpy,pandas,sklearn,tensorflow -g

import re
import os
import sys
from time import time
import pickle
import pathlib
import itertools
import numpy as np
import pandas as pd
import swifter
from mpl_toolkits.axes_grid1 import ImageGrid
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import watermark

# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"

np.random.seed(42)

CPython 3.7.3
IPython 7.8.0

numpy 1.17.3
pandas 0.25.1
sklearn 0.21.3
tensorflow 2.0.0

compiler   : Clang 4.0.1 (tags/RELEASE_401/final)
system     : Darwin
release    : 19.0.0
machine    : x86_64
processor  : i386
CPU cores  : 16
interpreter: 64bit
Git hash   : c6b9079d9be47f8dbb798dbc93d7f425d1c0a382


In [2]:
# Libraries specific of Deep learning and images
import imageio
from skimage.transform import resize as imresize
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score

import tensorflow as tf2
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau

from tensorflow.keras import applications
from tensorflow.keras.layers import Input, InputLayer, Activation, Maximum, ZeroPadding2D, concatenate, BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler

In [3]:
sys.path.append(os.pardir)
from src.log_manager import Logger
from src.text_func import list_abbreviations
from src.cv_tools import equalizer_augmentation
# from src.cv_models import inception_resnet_v2

## 1. Loading data

### 1.1 Using previous notebook dataset

In [4]:
# Define logger
logger = Logger("CNN", "cnn_logs.txt", file_path=os.getcwd(), level="INFO")

# Definitions
PLANT_CLASSES = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 
                 'Fat Hen', 'Loose Silky-bent', 'Maize', 'Scentless Mayweed', 
                 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']
CLASSES_DICT_NAMES = {name: k for k, name in zip(range(len(PLANT_CLASSES)), PLANT_CLASSES)}
CLASSES_DICT_NUM = {k: name for k, name in zip(range(len(PLANT_CLASSES)), PLANT_CLASSES)}
NUM_CATEGORIES = len(PLANT_CLASSES)
ABBREVIATIONS = list_abbreviations(PLANT_CLASSES)
RESHAPE_SIZE = (224, 224, 3)

In [5]:
IMAGES = "./data/plants/all_images.gz"
CLASSES = "./data/plants/all_classes.gz"

df = pd.read_csv(IMAGES)
df_classes = pd.read_csv(CLASSES)
df_classes['class'] = df_classes['class'].swiftly.apply(lambda x: CLASSES_DICT_NAMES[x.replace("’", "")])

# Apply the shape required
df = df.values.reshape(-1, *RESHAPE_SIZE)
# Balance the dataset so each class has similar number of samples
images_generated, labels_generated = equalizer_augmentation(df, df_classes, 'class')

# Encode labels to one hot vectors
n_unique = np.unique(labels_generated).size
labels_encoded = to_categorical(labels_generated, num_classes=n_unique)

Generating 49 images for label 3
Generating 155 images for label 8
Generating 186 images for label 10
Generating 224 images for label 5
Generating 299 images for label 11
Generating 310 images for label 1
Generating 427 images for label 2
Generating 453 images for label 0
Generating 488 images for label 9
Generating 505 images for label 7
Generating 509 images for label 4


In [6]:
X_tr, X_te, Y_tr, Y_te = train_test_split(images_generated, labels_encoded, 
                                          test_size=0.1, random_state=42)

In [7]:
train_generator = ImageDataGenerator(
    featurewise_center=False,             # set input mean to 0 over the dataset
    samplewise_center=False,              # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,   # divide each input by its std
    zca_whitening=False,                  # apply ZCA whitening
    rotation_range=10,                    # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.1,                     # Randomly zoom image 
    width_shift_range=0.1,                # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,               # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,                # randomly flip images
    vertical_flip=False)                  # randomly flip images

train_generator.fit(X_tr)

val_generator = ImageDataGenerator(
    featurewise_center=False,             # set input mean to 0 over the dataset
    samplewise_center=False,              # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,   # divide each input by its std
    zca_whitening=False,                  # apply ZCA whitening
    rotation_range=10,                    # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.1,                     # Randomly zoom image 
    width_shift_range=0.1,                # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,               # randomly shift images vertically (fraction of total height)
    horizontal_flip=False,                # randomly flip images
    vertical_flip=False)                  # randomly flip images

val_generator.fit(X_te)

## 2. Pre-Trained Model

There are several ways of creating a CNN model but in this case we will use Keras deep learning library and we will also use the available pretrained models in Keras, trained over ImageNet dataset that we will fine tune for our specific task.

It is quite inefficient to train a Convolution Neural Network from scratch except for learning purposes or some edge cases. So we take the weights of a pre trained CNN model on ImageNet with 1000 classes and fine tuning it by keeping some layers frozen and unfreezing some of them for our fine tuning training.

- The top layers learn simple basic features. We need not to train those layers as they can be directly applied to our task.
- The output differs (from 1000 classes to 12) so we add a final output layer with 12 classes.
- Then we will unfreeze some of the last layers and train them.
- We must check whether our dataset is similar to ImageNet and how big is our dataset. These 2 parameters will decide how we shoould perform the fine tuning. To know more in detail, read more from [Andrej Karpathy](https://medium.com/u/ac9d9a35533e?source=post_page-----9c1188157a86----------------------)

In our case, the dataset is small but a bit similar to ImageNet.

We will use Keras for initial benchmarks as Keras provides a number of pretrained models and we will use the ResNet50 and InceptionResNetV2 for our task. It is important to benchmark the dataset with one simple model and one very high end model to understand if we are overfitting/underfitting the dataset on the given model.

We can check available models [here](https://keras.io/applications/)

### 2.1 MobileNetV2


In [8]:
OUTPUT_CLASSES = 12
METRIC = "val_accuracy"

pretrained_MobileNetV2 = applications.MobileNetV2()

In [9]:
# Show first layers of the architecture
summary = []
pretrained_MobileNetV2.summary(print_fn=lambda x: summary.append(x))
print("\n".join(summary[:12]))

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________

In [10]:
# Show last layers of the architecture
summary = []
pretrained_MobileNetV2.summary(print_fn=lambda x: summary.append(x))
print("\n".join(summary[-14:]))

Conv_1 (Conv2D)                 (None, 7, 7, 1280)   409600      block_16_project_BN[0][0]        
__________________________________________________________________________________________________
Conv_1_bn (BatchNormalization)  (None, 7, 7, 1280)   5120        Conv_1[0][0]                     
__________________________________________________________________________________________________
out_relu (ReLU)                 (None, 7, 7, 1280)   0           Conv_1_bn[0][0]                  
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1280)         0           out_relu[0][0]                   
__________________________________________________________________________________________________
Logits (Dense)                  (None, 1000)         1281000     global_average_pooling2d[0][0]   
Total params: 3,538,984
Trainable params: 3,504,872
Non-trainable params: 34,112
____________________________

In [11]:
# Remove last layer
pretrained_MobileNetV2._layers.pop()
# Fix layers that we don't want to re-train
for layer in pretrained_MobileNetV2.layers:
    layer.trainable = False

summary = []
pretrained_MobileNetV2.summary(print_fn=lambda x: summary.append(x))
print("\n".join(summary[-12:]))

Conv_1 (Conv2D)                 (None, 7, 7, 1280)   409600      block_16_project_BN[0][0]        
__________________________________________________________________________________________________
Conv_1_bn (BatchNormalization)  (None, 7, 7, 1280)   5120        Conv_1[0][0]                     
__________________________________________________________________________________________________
out_relu (ReLU)                 (None, 7, 7, 1280)   0           Conv_1_bn[0][0]                  
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1280)         0           out_relu[0][0]                   
Total params: 2,257,984
Trainable params: 2,223,872
Non-trainable params: 34,112
__________________________________________________________________________________________________


In [12]:
input_ts = pretrained_MobileNetV2.input
output_ts = pretrained_MobileNetV2.layers[-1].output

x = output_ts
#x = GlobalAveragePooling2D()(x)
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
x = Dropout(0.5, noise_shape=None, seed=None)(x)
x = Dense(1280, activation='relu')(x)
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
x = Dropout(0.5, noise_shape=None, seed=None)(x)
x = Dense(512, activation='relu')(x)

predictions = Dense(OUTPUT_CLASSES, activation='softmax')(x)

In [13]:
custom_MobileNetV2 = Model(inputs=input_ts, outputs=predictions)
optimizer = Adam(lr=0.001)
custom_MobileNetV2.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

CUSTOM_MOBILENET_WEIGHTS = "./models/custom_MobileNetV2.hdf5"
lr_reducer = ReduceLROnPlateau(monitor=METRIC, factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=1e-5)
model_checkpoint= ModelCheckpoint(CUSTOM_MOBILENET_WEIGHTS, 
                                  monitor=METRIC, 
                                  save_best_only=True,
                                  save_weights_only=True, 
                                  verbose=1)
callbacks = [lr_reducer, model_checkpoint]

In [14]:
batch_size = 64
epochs = 5

In [17]:
%%time
custom_MobileNetV2.fit_generator(train_generator.flow(X_tr, Y_tr, batch_size=batch_size), 
                                 epochs=epochs, 
                                 workers=4, 
                                 callbacks=callbacks,
                                 validation_data=val_generator.flow(X_te, Y_te, batch_size=batch_size))

Epoch 1/5

Epoch 00001: val_accuracy improved from -inf to 0.08686, saving model to ./models/custom_MobileNetV2.hdf5
Epoch 2/5

Epoch 00002: val_accuracy improved from 0.08686 to 0.16258, saving model to ./models/custom_MobileNetV2.hdf5
Epoch 3/5

Epoch 00003: val_accuracy did not improve from 0.16258
Epoch 4/5

Epoch 00004: val_accuracy did not improve from 0.16258
Epoch 5/5

Epoch 00005: val_accuracy did not improve from 0.16258
CPU times: user 5h 57min 8s, sys: 5h 51min 4s, total: 11h 48min 12s
Wall time: 1h 27min 41s


<tensorflow.python.keras.callbacks.History at 0x148db1c18>

##### With the pre-trained model, we get:
- 87% accuracy on the 1<sup>st</sup> epoch vs ground up build models where it took 10 epochs to reach around 50%.
- 91% accuracy on the 5<sup>th</sup> epoch

In [30]:
custom_MobileNetV2.save_weights(CUSTOM_MOBILENET_WEIGHTS)

### 2.2 InceptionResNetV2

In [25]:
# Create custom model:

# 1) Copy all layers except the output layer
custom_InceptionResNetV2 = applications.InceptionResNetV2()
custom_InceptionResNetV2._layers.pop()

# 2) Make all layers non-trainable
for layer in custom_InceptionResNetV2.layers:
    layer.trainable = False

# # 3) Set new output layers
input_ts = pretrained_MobileNetV2.input
output_ts = pretrained_MobileNetV2.layers[-1].output
#x = GlobalAveragePooling2D()(x)
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(output_ts)
x = Dropout(0.5, noise_shape=None, seed=None)(x)
x = Dense(1280, activation='relu')(x)
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
x = Dropout(0.5, noise_shape=None, seed=None)(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(OUTPUT_CLASSES, activation='softmax')(x)

custom_InceptionResNetV2 = Model(inputs=input_ts, outputs=predictions)

summary = []
custom_InceptionResNetV2.summary(print_fn=lambda x: summary.append(x))
print("\n".join(summary[-16:]))

dropout_4 (Dropout)             (None, 1280)         0           batch_normalization_1425[0][0]   
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, 1280)         1639680     dropout_4[0][0]                  
__________________________________________________________________________________________________
batch_normalization_1426 (Batch (None, 1280)         5120        dense_5[0][0]                    
__________________________________________________________________________________________________
dropout_5 (Dropout)             (None, 1280)         0           batch_normalization_1426[0][0]   
__________________________________________________________________________________________________
dense_6 (Dense)                 (None, 512)          655872      dropout_5[0][0]                  
__________________________________________________________________________________________________
dense_7 (D

In [26]:
optimizer = Adam(lr=0.001)
custom_InceptionResNetV2.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

CUSTOM_INCEPTION_WEIGHTS = "./models/custom_InceptionResNetV2.hdf5"
lr_reducer = ReduceLROnPlateau(monitor=METRIC, factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=1e-5)
model_checkpoint= ModelCheckpoint(CUSTOM_INCEPTION_WEIGHTS, 
                                  monitor=METRIC, 
                                  save_best_only=True,
                                  save_weights_only=True, 
                                  verbose=1)
callbacks = [lr_reducer, model_checkpoint]

batch_size = 64
epochs = 5

In [27]:
%%time
# Re-train the model
custom_InceptionResNetV2.fit_generator(train_generator.flow(X_tr, Y_tr, batch_size=batch_size), 
                                       epochs=epochs, 
                                       workers=4, 
                                       callbacks=callbacks,
                                       validation_data=val_generator.flow(X_te, Y_te, batch_size=batch_size))

Epoch 1/5
Epoch 00001: val_accuracy improved from -inf to 0.07238, saving model to ./models/custom_InceptionResNetV2.hdf5
Epoch 2/5
Epoch 00002: val_accuracy improved from 0.07238 to 0.09243, saving model to ./models/custom_InceptionResNetV2.hdf5
Epoch 3/5
Epoch 00003: val_accuracy improved from 0.09243 to 0.14031, saving model to ./models/custom_InceptionResNetV2.hdf5
Epoch 4/5
Epoch 00004: val_accuracy did not improve from 0.14031
Epoch 5/5
Epoch 00005: val_accuracy did not improve from 0.14031
CPU times: user 5h 56min 13s, sys: 5h 44min 14s, total: 11h 40min 27s
Wall time: 1h 25min 32s


<tensorflow.python.keras.callbacks.History at 0x1e70265c0>

In [29]:
custom_InceptionResNetV2.save_weights(CUSTOM_INCEPTION_WEIGHTS)

### Get predictions for the validation set

In [31]:
# Predict the values from the validation dataset
Y_pred = custom_InceptionResNetV2.predict(X_te)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred, axis=1) 
# Convert validation observations to one hot vectors
Y_true = np.argmax(Y_te, axis=1) 
# compute the confusion matrix
cnf_matrix = confusion_matrix(Y_true, Y_pred_classes) 

### Confusion Matrix

In [41]:
def plot_confusion_matrix(cnf_matrix, abbreviations=None, output=None ,size=12):
    # plot the confusion matrix
    fig, ax = plt.subplots(figsize=(size, size))
    ax = sns.heatmap(cnf_matrix, annot=True, linewidths=0.01, cmap="Greens", linecolor="gray", fmt= '.1f',ax=ax)
    if abbreviations:
        ax.set_xticklabels(abbreviations)
        ax.set_yticklabels(abbreviations)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Confusion Matrix")
    if output:
        fig.savefig(output, dpi=300)

# plot the confusion matrix
plot_confusion_matrix(cnf_matrix, abbreviations=ABBREVIATIONS, output='./fig/model2_CNN_confusion_matrix.png')

### Other metrics

In [40]:
print(f'Accuracy: \t{accuracy_score(Y_true, Y_pred_classes):.3}')
print(f'Precision: \t{[round(k, 3) for k in precision_score(Y_true, Y_pred_classes, average=None)]}')
print(f'Recall: \t{[round(k, 3) for k in recall_score(Y_true, Y_pred_classes, average=None)]}')
print(f'F1 score: \t{[round(k, 3) for k in f1_score(Y_true, Y_pred_classes, average=None)]}')

# Or all together....
print(f'\nclasification report:\n {classification_report(Y_true, Y_pred_classes)}')