**Import the relevant packages**

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import warnings

warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
print(tfds.__version__)

4.9.2


**Data**

In [6]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

**Extract train and test data**

In [8]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

**take an arbitrary percentage of the train data to serve as validation**

In [10]:
#take 10% of the training data
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [11]:
#store test samples in a dedicated variable
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

**Scale the data**

In [13]:
#scale the data to make the result more numerically stable (i.e have inputs between 0 & 1)
#define a fxn that will scale the inputs
#as a precaution, make sure all values are floats
#you can scale your data as you see fit but ensure the function take image and label and returns image and label
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.  #dot here means we want the result to be a float
    return image, label   

In [14]:
#this will scale the whole dataset and store it in the new variable
scaled_train_and_validation_data = mnist_train.map(scale)

In [15]:
#scale and batch the test data
test_data = mnist_test.map(scale)

**Shuffle the data and create the validation dataset**

In [17]:
#shuffling is basically keeping the same information in a different order
#it should be as randomly spread as possible so the batching works as intended

BUFFER_SIZE = 10000 #useful when dealing with enormous dataset, where we can't shuffle all data at once
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

In [18]:
#extract the train and validation datasets
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)

In [19]:
#create the train data by extracting all elements but the first 'x' validation samples
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

In [20]:
#we are using mini-batch gradient descent to train our model
#set the batch size and prepare our data for batching
#batch size = 1 = SGD
#batch size = nos of samples = single batch GD
#1 < batch size < nos of samples = mini=batch GD

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

In [21]:
validation_inputs, validation_targets = next(iter(validation_data)) #this will make the dataset iterable but will not load any data

**Model**

**Outline the model**

In [24]:
input_size = 784
output_size = 10
hidden_layer_size = 200 #the underlying assumption is that all hidden layers are of the same size

In [25]:
#define the actual model
model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='tanh'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

**Choose the optimizer and loss function**

In [27]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

**Training**

In [29]:
#create a variable for the number of epochs we wish to train for
NUM_EPOCHS = 5

In [30]:
#fit the model
model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 6s - loss: 0.2529 - accuracy: 0.9242 - val_loss: 0.1206 - val_accuracy: 0.9652 - 6s/epoch - 11ms/step
Epoch 2/5
540/540 - 4s - loss: 0.0981 - accuracy: 0.9693 - val_loss: 0.0820 - val_accuracy: 0.9748 - 4s/epoch - 7ms/step
Epoch 3/5
540/540 - 4s - loss: 0.0657 - accuracy: 0.9795 - val_loss: 0.0667 - val_accuracy: 0.9792 - 4s/epoch - 7ms/step
Epoch 4/5
540/540 - 4s - loss: 0.0475 - accuracy: 0.9848 - val_loss: 0.0452 - val_accuracy: 0.9870 - 4s/epoch - 7ms/step
Epoch 5/5
540/540 - 3s - loss: 0.0362 - accuracy: 0.9884 - val_loss: 0.0461 - val_accuracy: 0.9852 - 3s/epoch - 6ms/step


<keras.src.callbacks.History at 0x2a71573b5e0>

**Test the model**

In [35]:
test_loss, test_accuracy = model.evaluate(test_data)



In [37]:
# We can apply some nice formatting if we want to
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.07. Test accuracy: 97.72%


**Save the model**

In [39]:
model.save("mnist_model.h5", include_optimizer=False)