**STEP 1:** Preliminary language-specific commands

In [56]:
import os
import numpy as np # same modules as before
import pandas as pd
from sklearn import neural_network, model_selection, metrics

import tensorflow as tf # for deep learning modules
from keras.datasets import mnist
from keras.layers import Conv2D, Dense, Flatten, MaxPooling2D
from keras.models import Sequential
from keras.utils import np_utils
from tensorflow.keras import optimizers, metrics

**STEP 2:** Load the data

**STEP 5:** Shuffle the samples and split into train and test (step order based on run_ann.py)

In [57]:
# keras' built-in datasets already take care of train/test split
(train_in, train_out), (test_in, test_out) = mnist.load_data()

**STEP 4:** Scale and reshape the data

In [58]:
# Data must be four-dimensional to can work with the Keras API
train_in = train_in.reshape(train_in.shape[0], train_in.shape[1], train_in.shape[2], 1)
test_in = test_in.reshape(test_in.shape[0], test_in.shape[1], test_in.shape[2], 1)
train_in = train_in.astype("float32")
test_in = test_in.astype("float32")

# Scaling
train_in /= 255
test_in /= 255

# using 10 here because that is the number of possible classifications (10 unique digits)
train_out = np_utils.to_categorical(train_out, 10)
test_out = np_utils.to_categorical(test_out, 10)

**STEP 3:** Determine the CNN hyperparameters

In [59]:
# here, we must build each layer of the CNN
cnn = Sequential()
cnn.add(Conv2D(8, (3, 3), activation="relu", input_shape=(28, 28, 1)))
cnn.add(Conv2D(16, (3, 3), activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Conv2D(32, (3, 3), activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Conv2D(64, (3, 3), activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Conv2D(128, (1, 1), activation="relu"))
cnn.add(MaxPooling2D(pool_size=(1,1)))
cnn.add(Flatten())
cnn.add(Dense(128, activation="relu"))
cnn.add(Dense(10, activation="softmax"))

method = optimizers.SGD(learning_rate=0.01) # set training method and learning rate

**STEP 6:** Train the ANN

In [60]:
# select type of loss (cross-entropy) and metric
# F1 score is not available but you can obtain precision and recall, then calculate F1 manually
cnn.compile(optimizer=method, loss="categorical_crossentropy", metrics=["accuracy"])

cnn.fit(train_in, train_out, epochs=20, batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fa2aebeedd0>

**STEP 7:** Predict training outputs

In [61]:
pred_train_out = cnn.predict(train_in)

# display predicted training outputs
train_compare = pd.DataFrame(pred_train_out)
display(train_compare)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.115588e-08,3.265633e-08,4.787224e-06,6.656150e-01,5.628019e-12,3.341227e-01,6.575051e-09,6.711620e-06,2.506441e-04,2.524777e-07
1,9.999416e-01,6.733116e-08,8.669520e-06,1.009173e-07,5.366571e-09,3.168864e-06,3.840189e-06,1.727582e-06,1.000186e-07,4.067512e-05
2,1.427577e-03,6.090386e-04,2.154811e-02,2.382792e-02,9.207859e-01,3.630320e-03,1.428003e-03,2.068520e-02,1.268219e-04,5.931166e-03
3,1.061620e-04,9.949771e-01,4.323366e-04,3.053069e-05,5.241856e-05,1.743015e-05,3.166250e-05,3.711804e-03,6.375764e-04,3.054704e-06
4,4.056609e-08,1.533615e-09,2.564194e-07,3.607500e-04,1.653852e-04,1.233118e-06,1.162587e-11,5.280921e-04,8.132640e-05,9.988630e-01
...,...,...,...,...,...,...,...,...,...,...
59995,5.749417e-07,3.680501e-09,1.305986e-04,4.088492e-04,3.328506e-10,8.843237e-05,2.206608e-08,4.458199e-07,9.993693e-01,1.844765e-06
59996,3.664644e-12,1.176913e-13,3.565455e-07,9.999987e-01,2.658048e-13,3.739652e-07,9.860968e-15,8.466590e-08,5.947938e-09,4.737621e-07
59997,1.361831e-09,1.065894e-09,2.928681e-09,1.111233e-04,1.004415e-09,9.997563e-01,1.769618e-10,2.688388e-07,7.732836e-05,5.513980e-05
59998,2.786225e-04,9.000942e-08,6.891226e-06,2.093469e-06,1.539828e-06,5.090727e-05,9.996290e-01,1.297061e-07,2.917101e-05,1.611999e-06


**STEP 8:** Get the training score

In [62]:
train_score = cnn.evaluate(train_in, train_out, verbose=0)

train_results = ["Training Loss (%): ", str(100 * train_score[0]), "Training Accuracy (%): ", str(100 * train_score[1])]
print(*train_results, sep="\n")    

Training Loss (%): 
6.0118477791547775
Training Accuracy (%): 
98.11999797821045


**STEP 9:** Predict testing outputs

In [63]:
pred_test_out = cnn.predict(test_in)

# display predicted training outputs
test_compare = pd.DataFrame(pred_test_out)
display(test_compare)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,3.144344e-07,1.200504e-06,3.738206e-04,2.150646e-05,2.157044e-09,3.433117e-08,2.995451e-13,9.995843e-01,5.042979e-06,1.385890e-05
1,1.826301e-06,8.893188e-07,9.999961e-01,7.836410e-07,1.386659e-10,2.963249e-11,8.219578e-09,1.220322e-07,2.077228e-07,5.697527e-11
2,2.648095e-04,9.957954e-01,8.281107e-04,2.642601e-05,8.399163e-04,6.067457e-05,1.188723e-04,1.558103e-03,4.958194e-04,1.193634e-05
3,9.996787e-01,1.945327e-09,3.126984e-04,7.437103e-09,1.178236e-10,1.344628e-07,8.304470e-07,5.005858e-06,6.924887e-09,2.670252e-06
4,2.659843e-06,2.031476e-06,2.354828e-06,4.940363e-07,9.998577e-01,1.307515e-07,2.384867e-07,2.743699e-05,5.929312e-07,1.064571e-04
...,...,...,...,...,...,...,...,...,...,...
9995,3.661478e-09,4.738850e-08,9.999990e-01,7.631170e-07,5.574019e-14,1.879282e-13,1.715947e-12,2.077902e-07,1.349256e-08,6.804421e-12
9996,1.627973e-10,1.537567e-09,1.581338e-06,9.999977e-01,1.030541e-11,6.234504e-07,2.415143e-13,2.452763e-10,6.895714e-08,3.358562e-08
9997,2.075397e-08,9.225751e-08,4.332597e-09,1.032651e-09,9.999968e-01,3.921699e-10,4.208083e-10,4.734256e-07,6.339324e-07,2.037304e-06
9998,9.890923e-06,1.056685e-10,5.141745e-07,6.307020e-08,2.922839e-12,9.999030e-01,7.164807e-06,1.220007e-07,7.915602e-05,8.260148e-08


**STEP 10:** Get the testing score

In [64]:
test_score = cnn.evaluate(test_in, test_out, verbose=0)

test_results = ["Testing Loss (%): ", str(100 * test_score[0]), "Testing Accuracy (%): ", str(100 * test_score[1])]
print(*test_results, sep="\n")   

Testing Loss (%): 
7.47632309794426
Testing Accuracy (%): 
97.87999987602234


**STEP 11:** Save evaluation results and outputs to a file

In [65]:
results = np.array([train_results, test_results])
results_file = pd.DataFrame(results)

# predicted values versus actual values on training data
train_compare = pd.DataFrame((np.vstack((pred_train_out,train_out))))
# predicted values versus actual values on testing data
test_compare = pd.DataFrame((np.vstack((pred_test_out,test_out))))

# filepath to "Saved Files" folder
savedir = "Saved Files" + os.sep
# export evaluation results
results_file.to_csv(savedir + "score.csv", index = False, header = False)
# export training outputs
train_compare.to_csv(savedir + "Predicted Training Outputs.csv", index = False, header = None)
# export test outputs
test_compare.to_csv(savedir + "Predicted Test Outputs.csv", index = False, header = None)

**STEP 12:** Display results to the console

In [66]:
for elt in results: print(*elt, sep="\n")    

Training Loss (%): 
6.0118477791547775
Training Accuracy (%): 
98.11999797821045
Testing Loss (%): 
7.47632309794426
Testing Accuracy (%): 
97.87999987602234
