## Import relevant libraries

In [1]:
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds

### Import the MNIST data set

In [2]:
#when loading the data use the with_info argument and as_supervised=True since this will be a supervised learning model
mnist_dataset, mnist_info = tfds.load(name = 'mnist',  with_info=True, as_supervised=True)



### Split the data into train and test

In [3]:
#since we set as_supervised to True, we can split the data to train and test easily
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
#since the training data is much larger, we will take the validation data from the training data
mnist_info.splits

{'test': <tfds.core.SplitInfo num_examples=10000>,
 'train': <tfds.core.SplitInfo num_examples=60000>}

### Preprocess the Data set

In [4]:
#set the number of samples in the validation data
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

In [5]:
#cast the number of validation samples as integers
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [6]:
#set the number of test samples
num_test_samples =mnist_info.splits['test'].num_examples

In [7]:
#cast the number of test samples as integers
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [8]:
num_validation_samples, num_test_samples

(<tf.Tensor: id=282, shape=(), dtype=int64, numpy=6000>,
 <tf.Tensor: id=284, shape=(), dtype=int64, numpy=10000>)

### Scale the the inputs

In [9]:
#image pixels take values from 0 to 255. Scaling them would give values from 0 to 1
#write a function to scale the image files
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /=255.
    return image, label

In [10]:
#scale the image files
scaled_train_and_validation_data = mnist_train.map(scale)
scaled_test_data = mnist_test.map(scale)

### Set aside the validation data from the training data

In [11]:
# First we need to shuffle the data

#Since MNIST is a large dataset, set a  buffer size for easier shuffling
BUFFER_SIZE = 10000

#shuffle the data
shuffled_train_and_validation_data= scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

# Create the validation data
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)

In [12]:
#Create the train data
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

### BATCH the data for SGD

In [13]:
#create a batch size variable
BATCH_SIZE = 100

In [14]:
#Batch teh train data
train_data = train_data.batch(BATCH_SIZE)

#Batch sizes for validation and test samples will be equal to the sample sizes
#since they are much smaller and they are not used in training

#Batch teh validation data.
validation_data = validation_data.batch(num_validation_samples)

#Batch teh test data
test_data = scaled_test_data.batch(num_test_samples)

In [15]:
#split the validation data into inputs and outputs
validation_inputs, validation_targets = next(iter(validation_data))

### Outline the ML Model

In [16]:
#input layer is 784x1 matrix since each image is 28x28 = 784
input_size = 784

#Hidden layers with 50 nodes each (feel free to change )
hidden_layer_size = 50

#10 outputs since there are ten digits from 0 to 9
output_size = 10

In [17]:
#create the TF model
model = tf.keras.Sequential([
                            #this is the input layer and it needs to be flattened to 784x1
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
    
                            #This model will have three hidden layers with RELU activation function
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    
                            #This is the output layer, and its size is 10 which will give one-hot-coding
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

### Pick an optimizer and the loss function

In [18]:
#ADAM is the state of the art optimizer that addresses the issues with other optimizers
#since this is a classification problem, our loss function is categorical crossentropy
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Train the model

In [19]:
#set the number of epochs for training
NUM_EPOCHS = 10

#the number of validation steps since we batch the training data due to its large size
VALIDATION_STEPS = num_validation_samples/BATCH_SIZE

#train the data
model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), validation_steps=VALIDATION_STEPS, verbose=2)

Epoch 1/10
540/540 - 12s - loss: 0.4106 - accuracy: 0.8822 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/10
540/540 - 11s - loss: 0.1738 - accuracy: 0.9484 - val_loss: 0.1507 - val_accuracy: 0.9592
Epoch 3/10
540/540 - 13s - loss: 0.1292 - accuracy: 0.9616 - val_loss: 0.1363 - val_accuracy: 0.9607
Epoch 4/10
540/540 - 13s - loss: 0.1039 - accuracy: 0.9676 - val_loss: 0.1115 - val_accuracy: 0.9697
Epoch 5/10
540/540 - 11s - loss: 0.0901 - accuracy: 0.9723 - val_loss: 0.1026 - val_accuracy: 0.9712
Epoch 6/10
540/540 - 11s - loss: 0.0774 - accuracy: 0.9764 - val_loss: 0.0997 - val_accuracy: 0.9730
Epoch 7/10
540/540 - 11s - loss: 0.0686 - accuracy: 0.9791 - val_loss: 0.0878 - val_accuracy: 0.9738
Epoch 8/10
540/540 - 11s - loss: 0.0590 - accuracy: 0.9814 - val_loss: 0.0712 - val_accuracy: 0.9782
Epoch 9/10
540/540 - 10s - loss: 0.0540 - accuracy: 0.9834 - val_loss: 0.0748 - val_accuracy: 0.9788
Epoch 10/10
540/540 - 12s - loss: 0.0481 - accuracy: 0.9853 - val_loss: 0.0664 - va

<tensorflow.python.keras.callbacks.History at 0x168c16e6cc8>

In [20]:
#Our model is learning since the loss function is decreasing
# Now lets test our model
test_loss, test_accuracy = model.evaluate(test_data)

print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))



### Our model predicted the outcome correctly 97% of the time.