# COMP5318 - Machine Learning and Data Mining: Assignment 2 - Best Algorithm

## CONTENTS

-  [0. Set up](#0)
-  [1. Obtain data](#1)
    -  [1.1. Data info](#1.1)
    -  [1.2. Load data](#1.2)
    -  [1.3. Set train/test data](#1.3)
-  [2. Pre-process data](#2)
    -  [2.1. Standardized data](#2.1)
-  [3. Best Algorithms - CNN](#3)
-  [4. Computer details](#4)
-  [5. Easy to use](#5)

## 0. Set up <a id='0'></a>

In [None]:
import sklearn
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import os
import time
import cv2

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

## 1. Obtain data <a id='1'></a>

### 1.1. Data info <a id='1.1'></a>

There is a "Fnt" folder including 62 main folders, each folder including 1016 png files as input data (which downloaded from http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/):

    EnglishFnt.tgz (51.1 MB): characters from computer fonts with 4 variations (combinations of italic, bold and normal).

### 1.2. Load data <a id='1.2'></a>

In [None]:
# read image and resize image
def img_resizing(i_path):
    img = cv2.imread(i_path, cv2.IMREAD_GRAYSCALE)
    data = cv2.resize(img, (28, 28), interpolation=cv2.INTER_CUBIC)
    return data

# converting the images into sets of data
data = []
label = []

for i in range(62):
    path = 'English/Fnt/Sample%03d/' % (i+1)
    for filename in os.listdir(path):
        try:
            img = img_resizing(path+filename)
            tmp = img.reshape([1, img.shape[0]*img.shape[1]])
            data.append(np.asarray(tmp, dtype = "int32"))
            label.append(i)
        except:
            error_message = path + filename
            print("failed: ", error_message)
            pass

# convert data type
data_array = np.asarray(data)
new_data = np.asarray(data_array).reshape(data_array.shape[0],-1)
print("The data shape is: ", new_data.shape)
new_label = np.asarray(label)
print("The label shape is: ", new_label.shape)

### 1.3. Set train / test data <a id='1.3'></a>

In [None]:
from sklearn.model_selection import train_test_split
X = new_data
y = new_label
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=500)

# print train and test data set shape
print("X_train shape:", X_train.shape, "y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape, "y_test shape:", y_test.shape)

## 2. Pre-process data <a id='2'></a>

### 2.1. Standardized data <a id='2.1'></a>

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()#creating an object
scaler.fit(X_train)#calculate min and max value of the training data
X_train_std = scaler.transform(X_train) #apply normalisation to the training set
X_test_std = scaler.transform(X_test) #apply normalization to the test set

## 3. Best Algorithms - CNN <a id='3'></a>

In [5]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [6]:
X_train_keras = X_train_std.reshape(X_train_std.shape[0], 28, 28)
X_test_keras = X_test_std.reshape(X_test_std.shape[0], 28, 28)

X_train_keras = np.expand_dims(X_train_keras, axis=3)
X_test_keras = np.expand_dims(X_test_keras, axis=3)

y_train_keras = keras.utils.to_categorical(y_train, 62).astype('int32')
y_test_keras = keras.utils.to_categorical(y_test, 62).astype('int32')

# print shape of data and label
print("The shape of train data: ", X_train_keras.shape)
print("The shape of test data: ", X_test_keras.shape)
print("The shape of train label: ", y_train_keras.shape)
print("The shape of test label: ", y_test_keras.shape)

The shape of train data:  (50393, 28, 28, 1)
The shape of test data:  (12599, 28, 28, 1)
The shape of train label:  (50393, 62)
The shape of test label:  (12599, 62)


In [13]:
# Clear any existing TensorFlow graph from memory and set random seeds.
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [14]:
# Model 
def cnn_model():
    # Clear any existing TensorFlow graph from memory and set random seeds.
    keras.backend.clear_session()
    np.random.seed(42)
    tf.random.set_seed(42)
    
    model = Sequential()
    model.add(layers.Conv2D(filters=64, kernel_size=(7,7),padding='same', 
                            input_shape=(28,28,1), activation='relu'))
    model.add(layers.BatchNormalization(axis=-1))
    model.add(layers.MaxPooling2D(pool_size=(3,3), strides=(2,2)))
    model.add(layers.Conv2D(filters=128, kernel_size=(5,5), strides=(1,1), padding='same', activation='relu'))
    model.add(layers.BatchNormalization(axis=-1))
    model.add(layers.MaxPooling2D(pool_size=(3,3), strides=(2,2)))

    model.add(layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
    model.add(layers.BatchNormalization(axis=-1))
    model.add(layers.Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
    model.add(layers.BatchNormalization(axis=-1))
    model.add(layers.MaxPooling2D(pool_size=(2,2), strides=(2,2)))

    model.add(layers.Flatten())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(units=256, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(units=128, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(units=62, activation='softmax'))
    return model
print(cnn_model().summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 64)        3200      
                                                                 
 batch_normalization (BatchN  (None, 28, 28, 64)       256       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 64)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 128)       204928    
                                                                 
 batch_normalization_1 (Batc  (None, 13, 13, 128)      512       
 hNormalization)                                                 
                                                        

In [17]:
# compile and fit model
lr_adam = [0.0001, 0.001]
epoch_adam = [50, 100]

lr_sgd = [0.001, 0.01]
epoch_sgd = [50, 100]

count = 1
cnn_res = {'tech': [], 'lr':[], 'epoch':[], 'accuracy_score':[], 'precision_score':[], 'recall_score':[], 'f1_score':[]}

for i in lr_adam:
    for j in epoch_adam:
        print("Adam: %d / %d ###################################################" % (count, len(lr_adam)*len(epoch_adam)))
        count += 1
        model = cnn_model()
        model.compile(loss="categorical_crossentropy", 
                      optimizer=keras.optimizers.Adam(learning_rate=i), 
                      metrics=["accuracy"])
        model.fit(X_train_keras, y_train_keras, epochs=j,
                           validation_split=0.2) # set 20% as validation seet

        y_prob = model.predict(X_test_keras)
        y_pred = np.argmax(y_prob,axis=1)
        ground_truth = np.argmax(y_test_keras,axis=1)
        
        cnn_res['tech'].append('Adam')
        cnn_res['lr'].append(i)
        cnn_res['epoch'].append(j)
        cnn_res['accuracy_score'].append(accuracy_score(ground_truth, y_pred))
        cnn_res['precision_score'].append(precision_score(ground_truth, y_pred, average='macro'))
        cnn_res['recall_score'].append(recall_score(ground_truth, y_pred, average='macro'))
        cnn_res['f1_score'].append(f1_score(ground_truth, y_pred, average='macro'))
        print("Adam with learning rate = %f, epochs = %d, the accuracy is: %f" % (i, j, accuracy_score(ground_truth, y_pred)))

count = 1
for i in lr_sgd:
    for j in epoch_sgd:
        print(" SGD: %d / %d ###################################################" % (count, len(lr_sgd)*len(epoch_sgd)))
        count += 1
        model = cnn_model()
        decay_rate = i / j
        sgd = keras.optimizers.SGD(learning_rate=i, momentum=0.8, decay=decay_rate, nesterov=False)
        model.compile(loss="categorical_crossentropy", 
                      optimizer=sgd, 
                      metrics=["accuracy"])
        model.fit(X_train_keras, y_train_keras, epochs=j,
                           validation_split=0.2) # set 20% as validation seet
        y_prob = model.predict(X_test_keras)
        y_pred = np.argmax(y_prob,axis=1)
        ground_truth = np.argmax(y_test_keras,axis=1)

        cnn_res['tech'].append('SGD')
        cnn_res['lr'].append(i)
        cnn_res['epoch'].append(j)
        cnn_res['accuracy_score'].append(accuracy_score(ground_truth, y_pred))
        cnn_res['precision_score'].append(precision_score(ground_truth, y_pred, average='macro'))
        cnn_res['recall_score'].append(recall_score(ground_truth, y_pred, average='macro'))
        cnn_res['f1_score'].append(f1_score(ground_truth, y_pred, average='macro'))
        print("SGD with learning rate = %f, epochs = %d, the accuracy is: %f" % (i, j, accuracy_score(ground_truth, y_pred)))

Adam: 1 / 4 ###################################################
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Adam with learning rate = 0.000100, epochs = 50, the accuracy is: 0.896023
Adam: 2 / 4 ###################################################
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Adam with learning rate = 0.000100, epochs = 100, the accuracy is: 0.900230
Adam: 3 / 4 ###################################################
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Adam with learning rate = 0.001000, epochs = 50, the accuracy is: 0.906104
Adam: 4 / 4 ###################################################
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Adam with learning rate = 0.001000, epochs = 100, the accuracy is: 0.904119
 SGD: 5 / 4 ###################################################
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50


Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
SGD with learning rate = 0.001000, epochs = 50, the accuracy is: 0.892690
 SGD: 6 / 4 ###################################################
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100


Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100


Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
SGD with learning rate = 0.001000, epochs = 100, the accuracy is: 0.902849
 SGD: 7 / 4 ###################################################
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50


Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
SGD with learning rate = 0.010000, epochs = 50, the accuracy is: 0.901024
 SGD: 8 / 4 ###################################################
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100


Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
SGD with learning rate = 0.010000, epochs = 100, the accuracy is: 0.906659


In [18]:
# print result
new_cnn_res = pd.DataFrame(cnn_res)
new_cnn_res

Unnamed: 0,tech,lr,epoch,accuracy_score,precision_score,recall_score,f1_score
0,Adam,0.0001,50,0.896023,0.898168,0.895615,0.894763
1,Adam,0.0001,100,0.90023,0.900808,0.900084,0.899572
2,Adam,0.001,50,0.906104,0.907161,0.905342,0.905588
3,Adam,0.001,100,0.904119,0.905419,0.903803,0.903309
4,SGD,0.001,50,0.89269,0.892944,0.892368,0.891547
5,SGD,0.001,100,0.902849,0.903077,0.902451,0.902198
6,SGD,0.01,50,0.901024,0.901407,0.900569,0.900407
7,SGD,0.01,100,0.906659,0.90712,0.906179,0.906228


In [21]:
# best model
model = cnn_model()
model.compile(loss="categorical_crossentropy", 
              optimizer=keras.optimizers.Adam(learning_rate=0.001), 
              metrics=["accuracy"])
model.fit(X_train_keras, y_train_keras, epochs=50,
                   validation_split=0.2) # set 20% as validation seet

y_prob = model.predict(X_test_keras)
y_pred = np.argmax(y_prob,axis=1)
ground_truth = np.argmax(y_test_keras,axis=1)

print("MLP - accuracy score on test set: {:.3f}".format(accuracy_score(ground_truth, y_pred)))
print("MLP - precision score on test set: {:.3f}".format(precision_score(ground_truth, y_pred, average='macro')))
print("MLP - recall score on test set: {:.3f}".format(recall_score(ground_truth, y_pred, average='macro')))
print("MLP - f1 score on test set: {:.3f}".format(f1_score(ground_truth, y_pred, average='macro')))
print("MLP - confusion matrix on test set: \n", confusion_matrix(ground_truth, y_pred))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
MLP - accuracy score on test set: 0.904
MLP - precision score on test set: 0.905
MLP - recall score on test set: 0.903
MLP - f1 score on test set: 0.903
MLP - confusion matrix on test set: 
 [[164   0   0 ...   0   0   0]
 [  0 173   0 ...   0   0   0]
 [  0   0 211 ...   0   0   0]
 ...
 [  0   0   0 ... 162   0   0]
 [  0   0   0 ...   0 191   0]
 [  0   0   0 ...   0   0 142]]


In [22]:
# save model
# model.save('best_algorithm.h5')

In [23]:
# pd.DataFrame(history.history).plot(figsize=(8,5))
# plt.grid(True)
# plt.gca().set_ylim(0,1)
# plt.show()

## 4. Computer details <a id='4'></a>

- Hardware
    - OS System: Windows 10 64-bit operating system
    - CPU: Intel(R) Core(TM) i7-9700KF
    - GPU: NVIDIA GeForce RTX 2070
    - RAM: 16.0 GB
    
- Software
    - Python 3.8.3
    - notebook 6.0.3

## 5. Easy to use <a id='5'></a>

In [24]:
import sklearn
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import os
import time
import cv2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator


%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
# read image and resize image
def img_resizing(i_path):
    img = cv2.imread(i_path, cv2.IMREAD_GRAYSCALE)
    data = cv2.resize(img, (28, 28), interpolation=cv2.INTER_CUBIC)
    return data

# converting the images into sets of data
data = []
label = []

data_path = 'English/Fnt/'
for i in range(62):
    path = data_path + 'Sample%03d/' % (i+1)
    for filename in os.listdir(path):
        img = img_resizing(path+filename)
        tmp = img.reshape([1, img.shape[0]*img.shape[1]])
        data.append(np.asarray(tmp, dtype = "int32"))
        label.append(i)

print("############### Data Loaded ###############")

# convert data type
data_array = np.asarray(data)
new_data = np.asarray(data_array).reshape(data_array.shape[0],-1)
new_label = np.asarray(label)

# set train/test data
X = new_data
y = new_label
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=500)

# standardize data
scaler = StandardScaler()#creating an object
scaler.fit(X_train)#calculate min and max value of the training data
X_train_std = scaler.transform(X_train) #apply normalisation to the training set
X_test_std = scaler.transform(X_test) #apply normalization to the test set


X_train_keras = X_train_std.reshape(X_train_std.shape[0], 28, 28)
X_test_keras = X_test_std.reshape(X_test_std.shape[0], 28, 28)

X_train_keras = np.expand_dims(X_train_keras, axis=3)
X_test_keras = np.expand_dims(X_test_keras, axis=3)

y_train_keras = keras.utils.to_categorical(y_train, 62).astype('int32')
y_test_keras = keras.utils.to_categorical(y_test, 62).astype('int32')

# load model
print("############### Model Path Input ###############")

model_path = 'best_algorithm.h5'
model = load_model(model_path)
print("############### model loaded ###############")

# evaluate model
print("############### Model Evaluation ###############")
# Check pretrained model performance on test set
y_prob = model.predict(X_test_keras)
y_pred = np.argmax(y_prob,axis=1)
ground_truth = np.argmax(y_test_keras,axis=1)

print("CNN - accuracy score on test set: {:.3f}".format(accuracy_score(ground_truth, y_pred)))
print("CNN - precision score on test set: {:.3f}".format(precision_score(ground_truth, y_pred, average='macro')))
print("CNN - recall score on test set: {:.3f}".format(recall_score(ground_truth, y_pred, average='macro')))
print("CNN - f1 score on test set: {:.3f}".format(f1_score(ground_truth, y_pred, average='macro')))
print("CNN - confusion matrix on test set: \n", confusion_matrix(ground_truth, y_pred))

############### Data Path Input ###############
Default path is: 'English/Fnt/'
*** DO NOT ENTER anything if using the above path ***
Windows Path Example: C:/Users/xxx/Downloads/5318/English/Fnt/
MacOS Path Example: /Users/xxx/Downloads/5318/English/Fnt/
Please input the data path (before Samplexxx folders) end with '/'
############### Data Loaded ###############
############### Model Path Input ###############
############### model loaded ###############
############### Model Evaluation ###############
CNN - accuracy score on test set: 0.904
CNN - precision score on test set: 0.905
CNN - recall score on test set: 0.903
CNN - f1 score on test set: 0.903
CNN - confusion matrix on test set: 
 [[164   0   0 ...   0   0   0]
 [  0 173   0 ...   0   0   0]
 [  0   0 211 ...   0   0   0]
 ...
 [  0   0   0 ... 162   0   0]
 [  0   0   0 ...   0 191   0]
 [  0   0   0 ...   0   0 142]]
