# EFC1 (Q3): Multi-Layer Perceptron, Keras Framework, Tensorflow Backend

Initially, we began with a basic MLP network, with one hidden layer with 500 neurons and 0.5 dropout. The code is as follows:

In [None]:
# -*- coding: utf-8 -*-

"""

for mean loss and accuracy, see initial.txt

"""

import tensorflow as tf
import os

AccuracySum=0
bestAccuracy=[1,0,0,0,0.2]

print("\nepoch: 5 ; neurons: 512 ; dropout: 0.5 \n")

mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
evaluation = model.evaluate(x_test, y_test)
model_json = model.to_json()

bestAccuracy = [1, 5, 512, 0.5, evaluation[0], evaluation[1]]

json_file = open("model_MLP.json", "w")
json_file.write(model_json)
json_file.close()
model.save_weights("model_MLP.h5")
print("Model saved to disk")
os.getcwd()

f=open("initial.txt","a+")
f.write(str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4])+" ; "+str(bestAccuracy[5])+"\n")
f.close()

We train the network 4 times to calculate and average loss and average accuracy. We obtained:

Average Loss: 0.067664
Average Accuracy: 0.979025

Now, we alter the original MLP to increase accuracy. To do so, we adopt a "trial and error" approach, altering the key parameters of the network with a for loop. We experimented with: number of hidden layers, epochs, number of neurons and dropout. As to not occupy all the RAM at once, and avoid using SWAP memory, one code was created for each situation: one, two, and three hidden layers:

In [None]:
# -*- coding: utf-8 -*-
""""
%-----------------------------------------------------------------------------%
%Author: André Barros de Medeiros
%Date:09/14/2019
%Copyright: free to use, copy, and modify
%Description: Multi-Layer Perceptron to classify MNIST dataset images
%Important: Layers: 2 (600 neurons, 10 neurons)
%           Epochs: 6
%           Dropout: 0.4 (first layer)
%           Activation Function: RELU (first layer)
%           Optimizer Algorithm: ADAM
%           Loss Function: Cross Entropy
%
%           Accuracy: 0.9819, gain of 0.008
%-----------------------------------------------------------------------------%
"""

import tensorflow as tf
import os
mnist = tf.keras.datasets.mnist

    
AccuracySum=0
bestAccuracy=[1,0,0,0,0.2]

for epoch in [7,8]:
    for neurons in [300,512]:
        for dropout in [0.2,0.3,0.4, 0.5]:
#epoch = 2
#neurons =500
#dropout=0.5
            
            print("\nepoch: " + str(epoch)+"; neurons: "+str(neurons)+"; dropout: "+str(dropout) + "\n")
            
            (x_train, y_train),(x_test, y_test) = mnist.load_data()
            x_train, x_test = x_train / 255.0, x_test / 255.0
            
            model = tf.keras.models.Sequential([
             tf.keras.layers.Flatten(),
             tf.keras.layers.Dense(neurons, activation=tf.nn.relu),
             tf.keras.layers.Dropout(dropout),
             tf.keras.layers.Dense(10, activation=tf.nn.softmax)
            ])
            
            model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
            model.fit(x_train, y_train, epochs=epoch)
            evaluation = model.evaluate(x_test, y_test) #store loss and accuracy
            
            print( "\nAcurracy with: " + str(epoch) + " , " + str(neurons) + " , " + str(dropout) + " is " + str(evaluation[1])+"\n")
            if evaluation[1] > bestAccuracy[4]:
                bestAccuracy = [1, epoch, neurons, dropout, evaluation[1]]
                print("\n New Best Accuracy \n")
            model_json = model.to_json()
            
            json_file = open("model_MLP.json", "w")
            json_file.write(model_json)
            json_file.close()
            
            model.save_weights("model_MLP.h5")
            print("Model saved to disk")
            os.getcwd()

f=open("one_hidden_layer.txt","w+")
f.write(str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4]));
f.close()            
#print(bestAccuracy)


In [None]:
# -*- coding: utf-8 -*-
""""
%-----------------------------------------------------------------------------%
%Author: André Barros de Medeiros
%Date:09/14/2019
%Copyright: free to use, copy, and modify
%Description: Multi-Layer Perceptron to classify MNIST dataset images
%Important: Layers: 3 (512 neurons, 512, 10 neurons)
%           Epochs: 5
%           Dropout: 0.5 (first layer)
%           Activation Function: RELU (first layer)
%           Optimizer Algorithm: ADAM
%           Loss Function: Cross Entropy
%
%           Loss: 0.0665 / Accuracy: 0.9811
%-----------------------------------------------------------------------------%
"""

import tensorflow as tf
import os
mnist = tf.keras.datasets.mnist

    
AccuracySum=0
bestAccuracy=[2,0,0,0,0.2]

for epoch in [7,8]:
    for neurons in [300,512]:
        for dropout in [0.2, 0.3, 0.4]:
        
            print("\nepoch: " + str(epoch)+"; neurons: "+str(neurons)+"; dropout: "+str(dropout) + "\n")
            
            (x_train, y_train),(x_test, y_test) = mnist.load_data()
            x_train, x_test = x_train / 255.0, x_test / 255.0
            
            model = tf.keras.models.Sequential([
             tf.keras.layers.Flatten(),
             tf.keras.layers.Dense(512, activation=tf.nn.relu),
             tf.keras.layers.Dropout(0.5),
             tf.keras.layers.Dense(neurons, activation=tf.nn.relu),
             tf.keras.layers.Dropout(dropout),
             tf.keras.layers.Dense(10, activation=tf.nn.softmax)
            ])
            
            model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
            model.fit(x_train, y_train, epochs=epoch)
            evaluation = model.evaluate(x_test, y_test) #store loss and accuracy
            
            print( "\nAcurracy with: " + str(epoch) + " , " + str(neurons) + " , " + str(dropout) + " is " + str(evaluation[1])+"\n")
            if evaluation[1] > bestAccuracy[4]:
                bestAccuracy = [2, epoch, neurons, dropout, evaluation[1]]
                print("\n New Best Accuracy \n")
            model_json = model.to_json()
            
            json_file = open("model_MLP.json", "w")
            json_file.write(model_json)
            json_file.close()
            
            model.save_weights("model_MLP.h5")
            print("Model saved to disk")
            os.getcwd()

f=open("two_hidden_layers.txt","w+")
f.write(str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4]));
f.close()            
#print(bestAccuracy)


In [None]:
# -*- coding: utf-8 -*-
""""
%-----------------------------------------------------------------------------%
%Author: André Barros de Medeiros
%Date:09/14/2019
%Copyright: free to use, copy, and modify
%Description: Multi-Layer Perceptron to classify MNIST dataset images
%Important: Layers: 4 (512 neurons, 512, 10 neurons)
%           Epochs: 5
%           Dropout: 0.5 (first layer)
%           Activation Function: RELU (first layer)
%           Optimizer Algorithm: ADAM
%           Loss Function: Cross Entropy
%
%           Loss: 0.0665 / Accuracy: 0.9811
%-----------------------------------------------------------------------------%
"""

import tensorflow as tf
import os
mnist = tf.keras.datasets.mnist

    
AccuracySum=0
bestAccuracy=[3,0,0,0,0.2]

for epoch in [7,8]:
    for neurons in [300,512]:
        for dropout in [0.2,0.3, 0.4, 0.5]:
        
            print("\nepoch: " + str(epoch)+"; neurons: "+str(neurons)+"; dropout: "+str(dropout) + "\n")
            
            (x_train, y_train),(x_test, y_test) = mnist.load_data()
            x_train, x_test = x_train / 255.0, x_test / 255.0
            
            model = tf.keras.models.Sequential([
             tf.keras.layers.Flatten(),
             tf.keras.layers.Dense(512, activation=tf.nn.relu),
             tf.keras.layers.Dropout(0.5),
             tf.keras.layers.Dense(neurons, activation=tf.nn.relu),
             tf.keras.layers.Dropout(dropout),
             tf.keras.layers.Dense(neurons, activation=tf.nn.relu),
             tf.keras.layers.Dropout(dropout),
             tf.keras.layers.Dense(10, activation=tf.nn.softmax)
            ])
            
            model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
            model.fit(x_train, y_train, epochs=epoch)
            evaluation = model.evaluate(x_test, y_test) #store loss and accuracy
            
            print( "\nAcurracy with: " + str(epoch) + " , " + str(neurons) + " , " + str(dropout) + " is " + str(evaluation[1])+"\n")
            if evaluation[1] > bestAccuracy[4]:
                bestAccuracy = [2, epoch, neurons, dropout, evaluation[1]]
                print("\n New Best Accuracy \n")
            model_json = model.to_json()
            
            json_file = open("model_MLP.json", "w")
            json_file.write(model_json)
            json_file.close()
            
            model.save_weights("model_MLP.h5")
            print("Model saved to disk")
            os.getcwd()

f=open("three_hidden_layers.txt","w+")
f.write(str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4]));
f.close()            
#print(bestAccuracy)


You will notice that in each code, we save the caracteristics of the network with the best accuracy. We then compare the three, and have that the best accuracy was: 0.9836 (network with 1 hidden layer, 8 epochs of training, 300 neurnos in the hidden layer, and 0.2 dropout. 

We now have our final network:

In [None]:
# -*- coding: utf-8 -*-
""""
%-----------------------------------------------------------------------------%
%Author: André Barros de Medeiros
%Date:09/23/2019
%Copyright: free to use, copy, and modify
%Description: Final Multi-Layer Perceptron to classify MNIST dataset images
%Important: Hidden Layers: 1 (300 neurons)
%           Epochs: 8
%           Dropout: 0.2 (first layer)
%           Activation Function: RELU (first layer)
%           Optimizer Algorithm: ADAM
%           Loss Function: Cross Entropy
%
% Averages (4 tests):
%           Loss: 0.066126  / Accuracy: 0.982075
%-----------------------------------------------------------------------------%
"""

import tensorflow as tf
import os
mnist = tf.keras.datasets.mnist

bestAccuracy=[0,0,0,0,0]
            
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
 tf.keras.layers.Flatten(),
 tf.keras.layers.Dense(300, activation=tf.nn.relu), #hidden layer
 tf.keras.layers.Dropout(0.2), #hidden layer dropout
 tf.keras.layers.Dense(10, activation=tf.nn.softmax) #output layer
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=8)
evaluation = model.evaluate(x_test, y_test) #store loss and accuracy

print( "\nAcurracy with: 8 epochs, 300 neurons, and 0.2 dropout is " + str(evaluation[1])+"\n")
if evaluation[1] > bestAccuracy[4]:
    bestAccuracy = [1, 8, 300, 0.2, evaluation[0], evaluation[1]] #[hidden layers ; epochs ; neurons in hidden layer ; hidden layer dropout, loss, accuracy]
model_json = model.to_json()

json_file = open("model_MLP.json", "w")
json_file.write(model_json)
json_file.close()

model.save_weights("model_MLP.h5") #save weights to use 
print("Model saved to disk")
os.getcwd()

f=open("finalMLP.txt","a+")
f.write(str(bestAccuracy[0])+" ; "+str(bestAccuracy[1])+" ; "+str(bestAccuracy[2])+" ; "+str(bestAccuracy[3])+" ; "+str(bestAccuracy[4])+" ; "+str(bestAccuracy[5])+"\n");
f.close()            
#print(bestAccuracy)


As done before, we train the network 4 times and obtain:

Average Loss: 0.066126
Average Accuracy: 0.982075

This means that we have an increase in 0.3%, which is substancial considering we are close to 100% and don't have much room to improve.