# Deep Neural Network for MNIST Classification

The dataset provides 70,000 images (28x28 pixels) of handwritten digits (1 digit per image). 

The goal is to write an algorithm that detects which digit is written. Since there are only 10 digits (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), this is a classification problem with 10 classes. 


In [9]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

## Data

In [2]:
# tfds.load(name, with_info, as_supervised) - loads a dataset from TensorFlow datasets 
# as_supervised=True - loads the data in a 2-tuple structure[input, target]
# with_info = True - provides a tuple containing info about version, features, # of datasets
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [3]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
# numbber of train sets are 600000 out of 700000

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
# casts a variable into a given data type
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)


## scale inputs between 0 and 1
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255. 
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)

# scale test data
test_data = mnist_test.map(scale)


## shuffle the data
# we cant shuffle a large data at once
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

# batch size = 1 - SGD
# batch size = number of samples - (single batch) GD
# 1 < batch size < # samples - mini-batch GD
BATCH_SIZE = 100

# dataset.batch(batch_size) - combines the consecutive elements of a dataset into batches
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

# iter() creates an object which can be iterated one element at a time(in a loop)
# next() loads the next element of an iterable object
validation_inputs, validation_targets = next(iter(validation_data))


## Model

### Outline the model

In [4]:
input_size = 784
output_size = 10
# if we increase the hidden layer size, the accuracy will increase
hidden_layer_size = 50

# tf.keras.Sequential() function that is laying down the model
model = tf.keras.Sequential([
    
    # tf.keras.layers.Flatten(original shape) transforms a tensor into a vector
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
    # Dense(output size) takes the inputs, provided to the model and calculates 
    # the dot product of the inputs and the weights and adds the bias.
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    # Include 10 hidden layers, then accuracy will increase
    
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

### choose the optimizer and the loss function

In [5]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',# 2 other types of crossentropy
             metrics = ['accuracy'])

### Training

In [6]:
NUM_EPOCHS = 5
VALIDATION_STEPS = num_validation_samples

model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, 
        validation_targets), validation_steps = VALIDATION_STEPS, verbose = 2)

Epoch 1/5
540/540 - 70s - loss: 0.4339 - accuracy: 0.8772 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 75s - loss: 0.1903 - accuracy: 0.9450 - val_loss: 0.1614 - val_accuracy: 0.9538
Epoch 3/5
540/540 - 76s - loss: 0.1414 - accuracy: 0.9586 - val_loss: 0.1389 - val_accuracy: 0.9582
Epoch 4/5
540/540 - 78s - loss: 0.1170 - accuracy: 0.9648 - val_loss: 0.1206 - val_accuracy: 0.9647
Epoch 5/5
540/540 - 80s - loss: 0.0982 - accuracy: 0.9696 - val_loss: 0.1019 - val_accuracy: 0.9710


<tensorflow.python.keras.callbacks.History at 0x637392810>

## To achieve 98.5%+ accuracy
<li> create 10 hidden layers
<li> hidden_layer_size = 5000
<li> batch_size = 150
<li> NUM_EPOCHS = 10

### Test the model

In [7]:
# model.evaluate()-returns the loss value and metrics values for the model in test mode
test_loss, test_accuracy = model.evaluate(test_data)



In [8]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.11. Test accuracy: 96.86%
