# Creation, training and evaluation of a CNN using TensorFlow backend Keras

In [None]:
#ALl needed modules for the code
from tensorflow.python.keras.models import load_model, save_model
from tensorflow.python.keras.preprocessing.image import img_to_array, ImageDataGenerator
from tensorflow.python.keras import backend as bkend
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
import cv2
import os
import argparse

import numpy as np
import random
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt

from keras.models import Model, Input
from keras import layers
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Average
from keras import optimizers
from keras import regularizers
from keras import losses


### Loading in images
The below code is taken and slightly adapted from the given code within test.py template in the project handout. It reads in images, transfers them to arrays and sets them up to be run through the CNN later in the code. First the seed is set, then the methods are set up before being called.

In [None]:
# Set random seeds to ensure the reproducible results
SEED = 309
np.random.seed(SEED)
random.seed(SEED)
tf.set_random_seed(SEED)

In [None]:
def load_images(test_data_dir, image_size, training):
    """
    Load images from local directory
    :return: the image list (encoded as an array)
    """
    # loop over the input images
    images_data = []
    labels = []
    imagePaths = sorted(list(paths.list_images(test_data_dir)))
    for imagePath in imagePaths:
        # load the image, pre-process it, and store it in the data list
       
        img = cv2.imread(imagePath)
        img = cv2.resize(img, image_size)
        image = img_to_array(img)
        images_data.append(image)
        label = imagePath.split(os.path.sep)[-2]
        labels.append(label)
        # extract the class label from the image path and update the
        # labels list
    return images_data, sorted(labels)


Below is the code which was used to create extra images from the given dataset, enriching the data and providing more variety to the dataset. It has been commented out as it was run once, adding the images created to a file where they were then moved into the correct training and test data files before being included when training and testing the CNN. It has been included to show how the images were generated

In [None]:
 #
 #   rand = random.uniform(0.0,1.0)
 #       #40% chance for an image to be changed or enhanced, options include blurring and rotations. Basic data preprocessing to artificially increase the data size
 #       if  rand < 0.2:
 #           if rand < 0.1:
 #               M = cv2.getRotationMatrix2D((100/2, 100/2), 90, 1)
 #               dst = cv2.warpAffine(img, M, image_size)
 #               images_data.append(dst)
 #               labels.append(label)
            #Rotate 180 degrees
 #           rand = random.uniform(0.0,1.0)
 #           if rand < 0.1:
 #               M = cv2.getRotationMatrix2D((100/2, 100/2), 180, 1)
 #               dst = cv2.warpAffine(img, M, image_size)
 #               images_data.append(dst)
 #               labels.append(label)
   
            #Blur image
 #           rand = random.uniform(0.0,1.0)
 #           if rand < 0.1:
 #               blur = cv2.blur(img,(5,5))
 #               images_data.append(blur)
 #               labels.append(label)
        


In [None]:
def convert_img_to_array(images, labels):
    # Convert to numpy and do constant normalize
    X_test = np.array(images, dtype = "float") / 255.0
    y_test = np.array(labels)
    

    # Binarize the labels
    lb = LabelBinarizer()
    y_test = lb.fit_transform(y_test)

    return X_test, y_test

The below code, taken from test.py was chosen over the imagedatagenerator option due to a rapid increase in speed due to the ability to run on the GPU of the computer the CNN was trained on. The imagedatagenerator code is included to show what could have been used. 

In [None]:
train_data_dir = 'train_data'
test_data_dir = 'test_data'

image_size = (100, 100)

# Load images
train_images, train_labels = load_images(train_data_dir, image_size, True)
test_images, test_labels = load_images(test_data_dir, image_size, False)

# Convert images to numpy arrays (images are normalized with constant 255.0), and binarize categorical labels
training_data, training_labels = convert_img_to_array(train_images, train_labels)
test_data, testing_labels = convert_img_to_array(test_images, test_labels)

training_data_original=training_data.copy()
test_data_original = test_data.copy()
training_data_labels = training_labels.copy()
testing_data_labels = testing_labels.copy()


In [None]:
#train_datagen = ImageDataGenerator(rescale = 1./255, 
#                                  horizontal_flip = True,
#                                  vertical_flip = True,
#                                  zoom_range=0.2,
#                                  rotation_range = 90)

#test_datagen = ImageDataGenerator(rescale=1./255)

#traind = train_datagen.flow(training_data, training_labels, batch_size=32)
#testd = test_datagen.flow(test_data, test_labels, batch_size=32)

### Build the model
Below is the model created from optimal combinations of loss functions, regularisation, number of layers etc as detailed in the report. This was the model which was saved to be used in test.py, and was one of the two used in the creation of an ensemble model later on.

In [None]:
#Dropout layer to remove risk of overfitting, removing half of the nodes
#Final convolution and pooling layers, reducing size again with 2x2 filter size and pooling sizes
#Flatten the data for dense input and to ensure later output is correct
#Fully connected layer, followed by a 30% dropout layer
#Final dense layer, outputs the classification of the image
#Best settings found for the SGD optimiser
#Model compiled with different optimiser and loss to model one, using second best settings to produce a more varied ensemble model



classifier = Sequential()
# First convolution and pooling layers, adding padding around the image at this stage to keep as much data integrity as possible before using a pooling layer with 4x4 size to reduce input to next convolution layer
classifier.add(Conv2D(32, (3, 3), input_shape = (100, 100, 3), padding = 'valid', activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (4, 4)))
# Second convolution and pooling layer, reducing size again and applying 3x3 filters to the data
classifier.add(Conv2D(64, (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (3, 3)))
#Dropout layer to remove risk of overfitting, removing half of the nodes
classifier.add(Dropout(rate = 0.5))
#Final convolution and pooling layers, reducing size again with 2x2 filter size and pooling sizes
classifier.add(Conv2D(32, (2, 2), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))
#Flatten the data for dense input and to ensure later output is correct
classifier.add(Flatten())
#Fully connected layer, followed by a 30% dropout layer
classifier.add(Dense(units = 128, activation = 'relu'))
classifier.add(Dropout(rate = 0.2))
#Final dense layer, outputs the classification of the image
classifier.add(Dense(units = 3, activation = 'softmax'))
#Best settings found for the Adam optimiser

adam = optimizers.Adam(lr = 0.001) #Used due to the quick rate and ease of training, also good overall but not as good as sgd
#Model compiled with different optimiser and loss to model two, using best settings to produce a more varied ensemble model
classifier.compile(optimizer = adam, loss = 'categorical_crossentropy', metrics = ['categorical_accuracy'])
classifier.summary()

### Train the CNN
This is where the CNN is trained, with the data supplied in arrays created previously in the code. Time is included to see the amount of time taken to train the CNN. Epochs at 100 proved to be enough to train the CNN to a high accuracy, the validation split choice is discussed in the report and batches of 50 proved useful in getting a high accuracy but not exceeding technical limitations and causing memory out errors

In [None]:
import time
#start = time.time()
classifier.fit(training_data, training_labels,
batch_size = 50,
validation_split = 0.1,
verbose = 1,
epochs = 100)
#end = time.time()
#print(end - start)


### Evaluation of the CNN
This is where the CNN is evaluated on the test data, which is entirely unseen data. The CNN is then saved to the models file to be used later in test.py

In [None]:
# evaluate the model
scores = classifier.evaluate(training_data, training_labels)
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))
print("Train loss:{} \n Train accuracy:{}".format(scores[0], scores[1]*100))

scores = classifier.evaluate(test_data, testing_labels)
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))
print("Test loss:{} \n Test accuracy:{}".format(scores[0], scores[1]*100))

#classifier.save('models/model.h5')

### Late EDA
In this portion of the code three random images are selected from the dataset and displayed, showing their prediction probabilities for each class in order to see how the CNN is performing on a few limited options to give an idea of noise in image impacting overall accuracy (often seen where too much green in a cherry picture causes an assumption that the image is a tomato)
The images are displayed as blue due to how openCV reads them in

In [None]:
rand_index = np.random.randint(test_data.shape[0])
rand_image = test_data_original[rand_index]
plt.imshow(rand_image)

print("Y prediction:{}".format(classifier.predict(np.array([test_data[rand_index],]))))

In [None]:
rand_index = np.random.randint(test_data.shape[0])
rand_image = test_data_original[rand_index]
plt.imshow(rand_image)

print("Y prediction:{}".format(classifier.predict(np.array([test_data[rand_index],]))))

In [None]:
rand_index = np.random.randint(test_data.shape[0])
rand_image = test_data_original[rand_index]
plt.imshow(rand_image)

print("Y prediction:{}".format(classifier.predict(np.array([test_data[rand_index],]))))

### Second CNN Model
Below is the code written to create a second CNN model, train it then run this model with the first model created to create an ensemble model. The ensemble model will make predictions on the test data but does not go any further. Given more time the data would ideally be compared to the actual labelled class to give a full comparison of accuracy and loss. This was included in the train.py notebook to show the attempt made and that some steps worked. 

In [None]:
classifier2 = Sequential()
# First convolution and pooling layers, adding padding around the image at this stage to keep as much data integrity as possible before using a pooling layer with 4x4 size to reduce input to next convolution layer
classifier2.add(Conv2D(32, (3, 3), input_shape = (100, 100, 3), padding = 'valid', activation = 'relu'))
classifier2.add(MaxPooling2D(pool_size = (4, 4)))
# Second convolution and pooling layer, reducing size again and applying 3x3 filters to the data
classifier2.add(Conv2D(64, (3, 3), activation = 'relu'))
classifier2.add(MaxPooling2D(pool_size = (3, 3)))
#Dropout layer to remove risk of overfitting, removing half of the nodes
classifier2.add(Dropout(rate = 0.5))
#Final convolution and pooling layers, reducing size again with 2x2 filter size and pooling sizes
classifier2.add(Conv2D(64, (2, 2), activation = 'relu'))
classifier2.add(MaxPooling2D(pool_size = (2, 2)))
#Flatten the data for dense input and to ensure later output is correct
classifier2.add(Flatten())
#Fully connected layer, followed by a 30% dropout layer
classifier2.add(Dense(units = 128, activation = 'relu'))
classifier2.add(Dropout(rate = 0.2))
#Final dense layer, outputs the classification of the image
classifier2.add(Dense(units = 3, activation = 'softmax'))
#Best settings found for the SGD optimiser
sgd = optimizers.SGD(lr=0.01, momentum=0.5)
#Model compiled with different optimiser and loss to model one, using second best settings to produce a more varied ensemble model
classifier2.compile(optimizer = sgd, loss = 'mse', metrics = ['categorical_accuracy'])
classifier2.summary()

Train the classifier with training data and labels created as arrays, using a batch size of 32 due to restricted tech and running for 100 epochs to give sgd best rate to train at

In [None]:
classifier2.fit(training_data, training_labels,
batch_size = 32,
validation_split = 0.1,
verbose = 1,
epochs = 100)

Model evaluation, same as above

In [None]:
# evaluate the model
scores = classifier2.evaluate(training_data, training_labels)
print("Train loss:{} \n Train accuracy:{}".format(scores[0], scores[1]*100))
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))

scores = classifier2.evaluate(test_data, testing_labels)
print("Test loss:{} \n Test accuracy:{}".format(scores[0], scores[1]*100))
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))

classifier2.save('models/sgdmodel.h5')

### Ensemble Model
The below code is to create an ensemble model and was heavily inspired by from: https://medium.com/randomai/ensemble-and-store-models-in-keras-2-x-b881a6d7693f The ensemble model does not work yet, predicting correctly but not getting an overall accuracy/loss

In [None]:
models = [classifier, classifier2]

def ensembleModels(models, model_input):
    # collect outputs of models in a list
    yModels=[model(model_input) for model in models] 
    # averaging outputs
    yAvg=layers.average(yModels) 
    # build model from same input and avg output
    modelEns = Model(inputs=model_input, outputs=yAvg, name='ensemble')  
   
    return modelEns

model_input = Input(shape=models[0].input_shape[1:])
modelEns = ensembleModels(models, model_input)
modelEns.summary()

predictions = modelEns.predict(test_data)
print(predictions)