In [4]:
import matplotlib
matplotlib.use("Agg")

from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder 
from tensorflow.keras.layers import AveragePooling2D 
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2
from modules import config
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import imutils
import random
import cv2
import os

# import dependencies for inference
from tensorflow.keras.models import load_model

In [5]:
def plot_training(H, N, plotPath):
    # construct a plot that plots and saves the training history
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.savefig(plotPath)

In [None]:
print("[INFO] loading images...")
data, labels = [], []
imagePaths = sorted(list(imutils.paths.list_images(config.ORIG_INPUT_DATASET)))
random.seed(42)
random.shuffle(imagePaths)

for imagePath in imagePaths:
    img = cv2.imread(imagePath)
    img = imutils.resize(img, width=160)
    img = img_to_array(img)
    data.append(img)
    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)
    
# encode the labels to integers
labels = np.array(labels)
le = LabelEncoder()
le.fit(labels)
labels = le.transform(labels)

# scale the raw pixel intensities to the range [-1, 1] as the model expects pixel values in that range
data = np.array(data, dtype="float") / 127.5 - 1.0

# partition the data into training, testing, validation splits
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=config.TRAIN_SPLIT, random_state=42)
(trainX, valX, trainY, valY) = train_test_split(trainX, trainY, test_size=config.VAL_SPLIT, random_state=42)

In [None]:
# construct the image generator for data augmentation
trainAug = ImageDataGenerator(rotation_range=18,
                              zoom_range=0.15,
                              width_shift_range=0.2, 
                              height_shift_range=0.2,
                              shear_range=0.15, 
                              horizontal_flip=True, 
                              fill_mode="nearest")
valAug = ImageDataGenerator()

In [None]:
# define the ImageNet mean subtraction (in RGB order) and set the
# the mean subtraction value for each of the data augmentation objects
mean = np.array([123.68, 116.779, 103.939], dtype="float32")
trainAug.mean = mean
valAug.mean = mean

In [None]:
# construct our base model
print("[INFO] compiling base model...")
base_model = MobileNetV2(input_tensor=Input(shape=(config.WIDTH,config.HEIGHT,config.DEPTH)),
                         include_top=False,
                         weights='imagenet')

In [None]:
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the training process
base_model.trainable = False

In [None]:
# construct the head of the model that will be placed on top of the base model
head_model = base_model.output
head_model = GlobalAveragePooling2D(pool_size=(5, 5))(head_model)
head_model = Flatten(name="flatten")(head_model)
head_model = Dense(256, activation="relu")(head_model)
head_model = Dropout(0.5)(head_model)
head_model = Dense(len(config.CLASSES), activation="softmax")(head_model)

In [None]:
model = Model(inputs=base_model.input, outputs=head_model)
print(model.summary())
print(base_model.summary())

In [None]:
# compile the model
print("[INFO] compiling model...")
opt = Adam(lr=config.INIT_LR, decay=config.INIT_LR / config.WARMUP_EPOCHS)

model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])

In [None]:
# train the head_model (top layers added on top of the MobileNetV2 base model) 
print("[INFO] training head model...")
H = model.fit(trainAug.flow(trainX, trainX, batch_size=config.BS),
              validation_data=valAug.flow(valX, valY, batch_size=config.BS),
              validation_steps=valX.shape[0] // config.BS,
              steps_per_epoch=trainX.shape[0] // config.BS,
              epochs=config.WARMUP_EPOCHS)


In [None]:
# use our trained model to make predictions on the data
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, 
                         batch_size=config.BS
                         steps=(testX.shape[0] // config.BS) + 1)

In [None]:
# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
predIdxs = np.argmax(predIdxs, axis=1)

In [None]:
# show a classification report
print(classification_report(testY, predIdxs, target_names=config.CLASSES))

In [None]:
# plot the training history
plot_training(H, config.WARMUP_EPOCHS, config.WARMUP_PLOT_PATH)

In [None]:
print("No. of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
train_from_layer = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[train_from_layer:]:
  layer.trainable = True

# show which layers are trainable
for layer in base_model.layers:
    print("{}: {}".format(layer, layer.trainable))


In [None]:
# for the changes to the model to take affect we need to recompile the model
print("[INFO] re-compiling model...")
opt = Adam(lr=config.INIT_LR, decay=config.INIT_LR / config.FINETUNE_EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
# train the model again, this time fine-tuning *both* the final set
# of CONV layers along with our set of FC layers
H = model.fit(trainAug.flow(trainX, trainX, batch_size=config.BS),
              validation_data=valAug.flow(valX, valY, batch_size=config.BS),
              validation_steps=valX.shape[0] // config.BS,
              steps_per_epoch=trainX.shape[0] // config.BS,
              epochs=config.FINETUNE_EPOCHS)


In [None]:
# use our trained model to make predictions on the data
print("[INFO] evaluating after fine-tuning network...")
predIdxs = model.predict(testX, 
                         batch_size=config.BS,
                         steps=(testX.shape[0] // config.BS) + 1)

In [None]:
predIdxs = np.argmax(predIdxs, axis=1)

In [None]:
print(classification_report(testY, predIdxs, target_names=config.CLASSES))

In [None]:
plot_training(H, config.FINETUNE_EPOCHS, config.UNFROZEN_PLOT_PATH)

In [None]:
# serialize the model to disk
print("[INFO] serializing network...")
model.save(config.MODEL_PATH, save_format="h5")

### Inference

In [None]:
# load the input image and then clone it so we can draw on it later
image = cv2.imread(config.IMAGE)
output = image.copy()
output = imutils.resize(output, width=400)

# our model was trained on RGB ordered images but OpenCV represents
# images in BGR order, so swap the channels, and then resize to
# 224x224 (the input dimensions for ResNet50)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (224, 224))

# convert the image to a floating point data type and perform mean subtraction
image = image.astype("float32")
mean = np.array([123.68, 116.779, 103.939][::-1], dtype="float32")
image -= mean

# load the trained model from disk
print("[INFO] loading model...")
model = load_model(config.MODEL_PATH)
# pass the image through the network to obtain our predictions
preds = model.predict(np.expand_dims(image, axis=0))[0]
i = np.argmax(preds)
label = config.CLASSES[i]

# draw the prediction on the output image
text = "{}: {:.2f}%".format(label, preds[i] * 100)
cv2.putText(output, text, (3, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

# show the output image
cv2.imshow("Output", output)
cv2.waitKey(0)