# Imports

In [103]:
from numpy.random import seed
seed(888)
from tensorflow import random
random.set_seed(404)

In [104]:
import os
import numpy as np
import tensorflow as tf

from time import strftime
from PIL import Image

# Constants

In [105]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'

LOGGING_PATH = 'tensorboard_mnist_digit_logs/'

NR_CLASSES = 10
VALIDATION_SIZE = 10000
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_WIDTH*IMAGE_HEIGHT*CHANNELS

# Get the Data

In [106]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: user 3.96 ms, sys: 1.89 ms, total: 5.85 ms
Wall time: 5.02 ms


In [107]:
y_train_all.shape

(60000,)

In [108]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [109]:
%%time

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: user 1.56 s, sys: 201 ms, total: 1.76 s
Wall time: 1.76 s


In [110]:
%%time

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

CPU times: user 263 ms, sys: 40.6 ms, total: 303 ms
Wall time: 302 ms


# Explore

In [111]:
x_train_all.shape

(60000, 784)

In [112]:
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [113]:
y_train_all.shape

(60000,)

In [114]:
x_test.shape

(10000, 784)

In [115]:
y_train_all[:5]

array([5, 0, 4, 1, 9])

# Data Preprocessing

In [116]:
# Re-scale
x_train_all, x_test = x_train_all / 255.0, x_test / 255.0

### Convert target values to one-hot encoding

In [117]:
values = y_train_all[:5]
np.eye(10)[values]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [118]:
np.eye(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [119]:
np.eye(10)[2]

array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])

In [120]:
values

array([5, 0, 4, 1, 9])

In [121]:
values[4]

9

In [122]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [123]:
y_train_all.shape

(60000, 10)

In [124]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

## Create validation dataset from training data

In [125]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]

In [126]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [127]:
x_train.shape

(50000, 784)

In [128]:
x_val.shape

(10000, 784)

In [129]:
class LogImages(tf.keras.callbacks.Callback):
    def __init__(self, log_dir, x_data):
        super(LogImages, self).__init__()
        self.log_dir = log_dir
        self.x_data = x_data 
        
    def on_epoch_end(self, epoch, logs=None):
        # Log a batch of images at the end of an epoch
        file_writer = tf.summary.create_file_writer(self.log_dir + '/images')

        with file_writer.as_default():
            images = np.reshape(self.x_data[:4], (-1, 28, 28, 1))  # Log 4 images
            tf.summary.image("4 training data examples", images, max_outputs=4, step=epoch)

# Load the training Image

In [130]:
from tensorflow.keras.preprocessing.image import img_to_array, load_img

img_path = '/Users/kenny/Documents/DS Projects/MathGarden/MNIST/test_img.png'

img = load_img(img_path, color_mode='grayscale', target_size=(28, 28))

img_array = img_to_array(img) / 255.0

test_img = img_array.flatten()


# Setup Tensorflow Graph

In [131]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout

# Neural network architecture parameters
n_hidden1 = 512  
n_hidden2 = 64   
NR_CLASSES = 10  
nr_epochs = 10   

# Define the Model
model = Sequential([
    Dense(n_hidden1, activation='relu', input_shape=(TOTAL_INPUTS,), name='layer_1'),  # Note the input_shape adjustment
    Dropout(0.2), 
    Dense(n_hidden2, activation='relu', name='layer_2'),
    Dense(NR_CLASSES, activation='softmax', name='output_layer')
])


# Compile the Model
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Display model's architecture
model.summary()

# Train the Model
history = model.fit(x_train, y_train, epochs=nr_epochs, batch_size=1000, validation_data=(x_val, y_val))

# Evaluate the Model on the test data
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test loss: {test_loss}, Test accuracy: {test_accuracy}")

# Model Predictions
predictions = model.predict(np.array([test_img])) 
predicted_class = np.argmax(predictions, axis=1)
print(f"Prediction for test image is {predicted_class}")


Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_1 (Dense)             (None, 512)               401920    
                                                                 
 dropout_9 (Dropout)         (None, 512)               0         
                                                                 
 layer_2 (Dense)             (None, 64)                32832     
                                                                 
 output_layer (Dense)        (None, 10)                650       
                                                                 
Total params: 435402 (1.66 MB)
Trainable params: 435402 (1.66 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.0708184540271759, Test ac

## Neural Network Architecture

In [132]:
nr_epochs = 50
learning_rate = 1e-3

n_hidden1 = 512
n_hidden2 = 64

In [133]:
model = Sequential([
    Dense(n_hidden1, activation='relu', input_shape=(TOTAL_INPUTS,), name='layer_1'),
    Dropout(0.2),
    Dense(n_hidden2, activation='relu', name='layer_2'),
    Dense(NR_CLASSES, activation='softmax', name='output_layer')
])

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])


# Tensorboard Setup

In [134]:
%load_ext tensorboard
%tensorboard --logdir logs/fit



The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 19019), started 0:31:06 ago. (Use '!kill 19019' to kill it.)

# Loss, Optimisation & Metrics

In [135]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Constants for the model
n_hidden1 = 512  
n_hidden2 = 64   
NR_CLASSES = 10  
TOTAL_INPUTS = 784 

# Define the Model
model = Sequential([
    Dense(n_hidden1, activation='relu', input_shape=(TOTAL_INPUTS,), name='layer_1'),
    Dropout(0.2), 
    Dense(n_hidden2, activation='relu', name='layer_2'),
    Dense(NR_CLASSES, activation='softmax', name='output_layer')
])

# Compile the Model
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Display model's architecture
model.summary()


Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_1 (Dense)             (None, 512)               401920    
                                                                 
 dropout_11 (Dropout)        (None, 512)               0         
                                                                 
 layer_2 (Dense)             (None, 64)                32832     
                                                                 
 output_layer (Dense)        (None, 10)                650       
                                                                 
Total params: 435402 (1.66 MB)
Trainable params: 435402 (1.66 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Run Session

In [136]:
# Define the batch size for training
size_of_batch = 1000 

history = model.fit(x_train, y_train,
                    epochs=nr_epochs,
                    batch_size=size_of_batch,
                    validation_data=(x_val, y_val))

print(history.history)

# Evaluate the Model on the test dataset
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
{'loss': [0.6179391145706177, 0.2315639704465866, 0.1676628738641739, 0.1329767256975174, 0.10681363940238953, 0.09137571603059769, 0.0748787671327591, 0.06463319063186646, 0.05599426105618477, 0.04746713861823082, 0.04189279302954674, 0.037807583808898926, 0.03188468888401985, 0.02831592783331871, 0.025708021596074104, 0.023084457963705063, 0.020959068089723587, 0.018151551485061646, 0.016252972185611725,

Test Loss: 0.07373380661010742, Test Accuracy: 0.9821000099182129


# Make a Prediction

In [137]:
from tensorflow.keras.preprocessing.image import img_to_array, load_img

# Load the image, ensuring it is in grayscale, and resize it to 28x28 pixels
img = load_img('MNIST/test_img.png', color_mode='grayscale', target_size=(28, 28))

# Convert the image to an array, normalize it, and possibly invert if required
img_array = img_to_array(img) / 255.0

# If your model was trained on inverted images, invert the pixel values
# img_array = 1.0 - img_array

# Flatten the array if your model expects flattened input
test_img = img_array.flatten()

# Make the prediction
predictions = model.predict(np.array([test_img]))
predicted_class = np.argmax(predictions, axis=1)
print(f'Prediction for test image is {predicted_class}')


Prediction for test image is [5]


In [138]:
# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Accuracy on test set is {test_accuracy:.2%}')


Accuracy on test set is 98.21%
