# EFC1 (Q4): Convolutional Neural Network, Keras Framework, Tensorflow Backend

Initially, we start out with a basic convolutional neural netowrk with two convolutional layers, each with a 3x3 kernel and RELU as their activation function. On top of that another fully connected hidden layer with 128 neurons and 0.5 dropout. The code is as follows:

In [None]:
# -*- coding: utf-8 -*-
""""
%-----------------------------------------------------------------------------%
%Author: André Barros de Medeiros
%Date:09/16/2019
%Copyright: free to use, copy, and modify
%Description: Convolutional Network to classify MNIST dataset images
%Important: Activation Function: RELU (first layer)
%           Optimizer Algorithm: ADAM
%           Loss Function: Cross Entropy
%
%           Loss: 0.0665 / Accuracy: 0.9811
%-----------------------------------------------------------------------------%
"""

import tensorflow as tf
import os
mnist = tf.keras.datasets.mnist

epoch=5
conv1=32
conv2=64
kernel= [3,3]
pool=[2,2]
dropout1=0.25
neurons=128
dropout2=0.5

bestAccuracy=[2,0,0,0,0,0,0,0,0.2]

 
print("\nepoch: " + str(epoch)+"\n Convolution 1: "+str(conv1)+" with kernel: "+str(kernel) +"\n Convolution 2: "+str(conv2)+" with kernel: "+str(kernel)+" and MaxPooling pool size: "+str(pool)+" and dropout: "+str(dropout1)+ "\n Fully connected layer with: "+str(neurons)+" and dropout: "+str(dropout2)+"\n")

(x_train, y_train),(x_test, y_test) = mnist.load_data()
# reshape to be [samples][width][height][pixels]
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
evaluation = model.evaluate(x_test, y_test)
print( "\nAcurracy is :" + str(evaluation[1])+"\n")
if evaluation[1] > bestAccuracy[4]:
    bestAccuracy = [2, epoch, conv1, conv2, kernel[1], pool[1], dropout1, neurons, dropout2, evaluation[0], evaluation[1]]
    print("\n New Best Accuracy \n")
model_json = model.to_json()

json_file = open("model_CNN.json", "w")
json_file.write(model_json)
json_file.close()
model.save_weights("model_CNN.h5")
print("Model saved to disk")
os.getcwd()

f=open("ConvInitial.txt","a+")
f.write("\n"+str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4])+" ; "+str(bestAccuracy[5])+" ; "+str(bestAccuracy[6])+" ; "+str(bestAccuracy[7])+" ; "+str(bestAccuracy[8])+" ; "+str(bestAccuracy[9])+" ; "+str(bestAccuracy[10]))
f.close()

We train the network 4 times to calculate an average loss and average accuracy, obtaining:

Average Loss: 0.031909425
Average Accuracy: 0.989825

Now, we want to improve the netowork. To do so, we adopt a "trial and error" approach and with a for loop, train several networks with different parameters. In the code below you will see another convolutional layer was added and the following parameters were experimented with: size of the convolution and the layer's dropout.

In [None]:
# -*- coding: utf-8 -*-
""""
%-----------------------------------------------------------------------------%
%Author: André Barros de Medeiros
%Date:09/16/2019
%Copyright: free to use, copy, and modify
%Description: Convolutional Network to classify MNIST dataset images
%Important: Activation Function: RELU (first layer)
%           Optimizer Algorithm: ADAM
%           Loss Function: Cross Entropy
%
%           Loss: 0.0665 / Accuracy: 0.9811
%-----------------------------------------------------------------------------%
"""

import tensorflow as tf
import os
mnist = tf.keras.datasets.mnist

epoch=5
conv1=32
conv2=64
kernel= [3,3]
pool=[2,2]
dropout1=0.25
conv3=[32,64]
dropout2=[0.3,0.5]
neurons=128
dropout3=0.5

bestAccuracy=[2,0,0,0,0,0,0,0,0,0,0,0,0.2]

for i in [0,1]:
    for j in [0,1]:
        print("\nepoch: " + str(epoch)+"\n Convolution 1: "+str(conv1)+" with kernel: "+str(kernel) +"\n Convolution 2: "+str(conv2)+" with kernel: "+str(kernel)+" and MaxPooling pool size: "+str(pool)+" and dropout: "+str(dropout1)+"\n Convolution 3: "+str(conv3[i])+" with kernel: "+str(kernel)+" and MaxPooling pool size: "+str(pool)+" and dropout: "+str(dropout2[j])+ "\n Fully connected layer with: "+str(neurons)+" and dropout: "+str(dropout3)+"\n")
        
        (x_train, y_train),(x_test, y_test) = mnist.load_data()
        # reshape to be [samples][width][height][pixels]
        x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
        x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
        x_train, x_test = x_train / 255.0, x_test / 255.0
        
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
        model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
        model.add(tf.keras.layers.Dropout(0.25))
        #model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Conv2D(conv3[i], (3,3), activation='relu'))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
        model.add(tf.keras.layers.Dropout(dropout2[i]))
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(128, activation='relu'))
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(tf.keras.layers.Dense(10, activation='softmax'))
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        model.fit(x_train, y_train, epochs=5)
        evaluation = model.evaluate(x_test, y_test)
        print( "\nAcurracy is :" + str(evaluation[1])+"\n")
        if evaluation[1] > bestAccuracy[4]:
            bestAccuracy = [2, epoch, conv1, conv2, kernel[1], pool[1], dropout1, conv3[i], dropout2[j], neurons, dropout3, evaluation[0], evaluation[1]]
            print("\n New Best Accuracy \n")
        model_json = model.to_json()
        
        json_file = open("model_CNN.json", "w")
        json_file.write(model_json)
        json_file.close()
        model.save_weights("model_CNN.h5")
        print("Model saved to disk")
        os.getcwd()

f=open("ConvImproved.txt","a+")
f.write("\n"+str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4])+" ; "+str(bestAccuracy[5])+" ; "+str(bestAccuracy[6])+" ; "+str(bestAccuracy[7])+" ; "+str(bestAccuracy[8])+" ; "+str(bestAccuracy[9])+" ; "+str(bestAccuracy[10])+" ; "+str(bestAccuracy[11])+" ; "+str(bestAccuracy[12]))
f.close()

As seen above, the code saves the main caracteristics of the network with the best accuracy. Which was: 0.9936 (network with convolution size 32 and 0.3 dropout.

Finally, we can produce the code for our final network:

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 17 17:18:33 2019

@author: Andre
"""

# -*- coding: utf-8 -*-
"""
%-----------------------------------------------------------------------------%
%Author: André Barros de Medeiros
%Date:09/17/2019
%Copyright: free to use, copy, and modify
%Description: Convolutional Network to classify MNIST dataset images
%Important: Activation Function: RELU 
%           Optimizer Algorithm: ADAM
%           Loss Function: Cross Entropy
%
%           Mean Loss and Accuracy: see ConvFinal.txt
%-----------------------------------------------------------------------------%
"""

import tensorflow as tf
import os
mnist = tf.keras.datasets.mnist

epoch=5
conv1=32
conv2=64
kernel= [3,3]
pool=[2,2]
dropout1=0.25
conv3=32
dropout2=0.3
neurons=128
dropout3=0.5

bestAccuracy=[2,0,0,0,0,0,0,0,0,0,0,0,0.2]


print("\nepoch: " + str(epoch)+"\n Convolution 1: "+str(conv1)+" with kernel: "+str(kernel) +"\n Convolution 2: "+str(conv2)+" with kernel: "+str(kernel)+" and MaxPooling pool size: "+str(pool)+" and dropout: "+str(dropout1)+"\n Convolution 3: "+str(conv3)+" with kernel: "+str(kernel)+" and MaxPooling pool size: "+str(pool)+" and dropout: "+str(dropout2)+ "\n Fully connected layer with: "+str(neurons)+" and dropout: "+str(dropout3)+"\n")

(x_train, y_train),(x_test, y_test) = mnist.load_data()
# reshape to be [samples][width][height][pixels]
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))
#model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
evaluation = model.evaluate(x_test, y_test)
print( "\nAcurracy is :" + str(evaluation[1])+"\n")
if evaluation[1] > bestAccuracy[4]:
    bestAccuracy = [2, epoch, conv1, conv2, kernel[1], pool[1], dropout1, conv3, dropout2, neurons, dropout3, evaluation[0], evaluation[1]]
    print("\n New Best Accuracy \n")
model_json = model.to_json()

json_file = open("model_CNN.json", "w")
json_file.write(model_json)
json_file.close()
model.save_weights("model_CNN.h5")
print("Model saved to disk")
os.getcwd()

f=open("ConvFinal.txt","a+")
f.write("\n"+str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4])+" ; "+str(bestAccuracy[5])+" ; "+str(bestAccuracy[6])+" ; "+str(bestAccuracy[7])+" ; "+str(bestAccuracy[8])+" ; "+str(bestAccuracy[9])+" ; "+str(bestAccuracy[10])+" ; "+str(bestAccuracy[11])+" ; "+str(bestAccuracy[12]))
f.close()

As done above, we train the network 4 times and obtain:

Average Loss: 0.0244505
Average Accuracy: 0.99175

Comparing the initial network with our final version, we have a increase of 1% in accuracy, which is very substancial considering we initially only had about 2% of "room" to improve.