In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow_datasets as tfds
%matplotlib inline

# MNIST Dataset
#### General Dataset Info
 * Images are based on grey scale - max value for a pixel is 255.
 * Not provided with targets for the testing data, only for the training data.

#### To do List
 * Normalize data - divide each pixel by 255 - ENSURE ALL ENTRIES ARE OF TYPE FLOAT TO ENSURE THERE IS NO DATA LOSS.
   * Manually divide each pixel by 255.
 * Shuffle and split training data: 10% of 42000 images will be used to validate model to prevent overfitting.
   * Preprocess data - normalizing and shuffling 
 * Make CNN to analyze the images
   * Use Linear rectified unit function for hidden layers.
   * Use softmax function for output layer 
   * callback method will be based on if validation loss increases.
   * The optimizer will be 'Adam'.
   * The loss function will be a categorical cross entropy function.

In [15]:
#Setup the data for the CNN: extract and batch data from tensorflow datasets

#Extract MNIST dataset
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

#Partition MNIST dataset
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

#Scale pixels of images
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.0
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)


BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

In [31]:
#Creating the model - the model will have 2 hidden layers with a width of 70. Mess with hyperparameters later.


model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)))
model.add(tf.keras.layers.MaxPool2D((2,2)))
model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPool2D((2,2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units = 1600, activation = 'relu'))
model.add(tf.keras.layers.Dense(units = 128, activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(units = 10, activation = 'softmax'))

model.compile(optimizer = 'adam',loss = "sparse_categorical_crossentropy", metrics = ['accuracy'])

In [32]:
#Fitting the data to the network. Hyper parameters can be messed with later.
max_epochs = 100


early_stop = tf.keras.callbacks.EarlyStopping(patience = 2)


model.fit(train_data,
          epochs = max_epochs,
          verbose = 1,
          callbacks = [early_stop],
          validation_data = validation_data
         )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


<tensorflow.python.keras.callbacks.History at 0x269199c96c8>

In [29]:
#Making the predictions and recieving an array of probabilities for each image - 28000 predictions. Therefore 280000 prob.
test_loss, test_accuracy = model.evaluate(test_data)

      1/Unknown - 3s 3s/step - loss: 0.0379 - accuracy: 0.9899

In [30]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.04. Test accuracy: 98.99%
