# Deep Neural Network for MNIST Classification

The dataset is called MNIST and refers to handwritten digit recognition. The dataset provides 70,000 images (28x28 pixels) of handwritten digits (1 digit per image). 

The goal is to write an algorithm that detects which digit is written. Since there are only 10 digits (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), this is a classification problem with 10 classes. 

In [42]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

## Data

In [43]:
#Load in the data
#with_info=True provides useful metadata
#as_supervised=True will load the dataset in a 2-tuple structure (input, target)
mnist_data, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

#create our data variables
mnist_train, mnist_test = mnist_data['train'], mnist_data['test']

In [44]:
mnist_info

tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_dir='C:\\Users\\tobst\\tensorflow_datasets\\mnist\\3.0.1',
    file_format=tfrecord,
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    nondeterministic_order=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={

In [45]:
#create variables for data allocation (train, validate, test)
#cast them to int just in case
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [46]:
#scale the data between 0 and 1
def scale(image, label):
    image = tf.cast(image, tf.int64)
    image /= 255 #255 since values range from 0-255
    return image, label

scaled_train_validation = mnist_train.map(scale)
scaled_test = mnist_test.map(scale)

In [47]:
#shuffle the training data
buffer_size = 10000

shuffled_train_validation = scaled_train_validation.shuffle(buffer_size)

#create the train and validation datasets
validation = shuffled_train_validation.take(num_validation_samples)
train = shuffled_train_validation.skip(num_validation_samples)

In [48]:
#create batches for model training and compatability
batch_size = 100

train = train.batch(batch_size)
validation = validation.batch(num_validation_samples)
test = scaled_test.batch(num_test_samples)

In [49]:
#split validation data for validation testing later
#use next(iter()) because validation is a tf.data.Dataset/iterable list of batches since we batched it earlier
validation_inputs, validation_targets = next(iter(validation))

## Model

In [50]:
#create the neural network
input_size = 784 #784 because 28^2
output_size = 10 #10 beause 10 possible targets
hidden_layer_width = 350

model = tf.keras.Sequential([
    #first declare the input layer
    tf.keras.Input(shape=(28, 28, 1)),
    tf.keras.layers.Flatten(), #flatten since our hidden layers can not take 2D objects but 1D, so vectors
    tf.keras.layers.Dense(hidden_layer_width, activation='relu'), #1st hidden layer
    tf.keras.layers.Dense(hidden_layer_width, activation='relu'), #2nd hidden layer
    tf.keras.layers.Dense(output_size, activation='softmax') #output layer, use softmax since we want proportions for classification
])

## Optimization

In [51]:
custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

#adam since it is optimal
#sparse_categorical_crossentropy since our data is not already one-hot encoded
model.compile(custom_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training

In [52]:
num_epochs = 7

model.fit(train, epochs=num_epochs, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/7
540/540 - 3s - 6ms/step - accuracy: 0.9309 - loss: 0.2375 - val_accuracy: 0.9638 - val_loss: 0.1122
Epoch 2/7
540/540 - 2s - 4ms/step - accuracy: 0.9731 - loss: 0.0890 - val_accuracy: 0.9783 - val_loss: 0.0769
Epoch 3/7
540/540 - 2s - 4ms/step - accuracy: 0.9817 - loss: 0.0571 - val_accuracy: 0.9817 - val_loss: 0.0542
Epoch 4/7
540/540 - 2s - 4ms/step - accuracy: 0.9859 - loss: 0.0429 - val_accuracy: 0.9882 - val_loss: 0.0398
Epoch 5/7
540/540 - 2s - 4ms/step - accuracy: 0.9897 - loss: 0.0309 - val_accuracy: 0.9855 - val_loss: 0.0418
Epoch 6/7
540/540 - 2s - 4ms/step - accuracy: 0.9916 - loss: 0.0268 - val_accuracy: 0.9923 - val_loss: 0.0290
Epoch 7/7
540/540 - 2s - 4ms/step - accuracy: 0.9930 - loss: 0.0207 - val_accuracy: 0.9907 - val_loss: 0.0298


<keras.src.callbacks.history.History at 0x1f67b33ebf0>

In [53]:
test_loss, test_accuracy = model.evaluate(test)
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step - accuracy: 0.9802 - loss: 0.0724
Test loss: 0.07. Test accuracy: 98.02%
