# My Hello World deep neural network: MNIST Classification

## We will detect which digit is written in the MNIST dataset (handwritten digit recognition).
- We will use 70,000 images (28x28 pixels) of handwritten digits (1 digit per image) from tensorflow_datasets
- We will classify 'each' image into one of 10 categories  (0, 1, 2, 3, 4, 5, 6, 7, 8, 9).
- We will use a neural network with 2 hidden layers

## Importing packages and loading the data

In [1]:
import numpy as np
import tensorflow as tf

#There is a PyPI package containing data set (images, videos, ....) files
#for tensorflow, the package name is tensorflow_datasets
import tensorflow_datasets as tf_ds

In [2]:
raw_data = tf_ds.load(name='mnist', as_supervised=True, with_info=True )
raw_data

({'test': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
  'train': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>},
 tfds.core.DatasetInfo(
     name='mnist',
     version=3.0.0,
     description='The MNIST database of handwritten digits.',
     homepage='http://yann.lecun.com/exdb/mnist/',
     features=FeaturesDict({
         'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
         'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
     }),
     total_num_examples=70000,
     splits={
         'test': 10000,
         'train': 60000,
     },
     supervised_keys=('image', 'label'),
     citation="""@article{lecun2010mnist,
       title={MNIST handwritten digit database},
       author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
       journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
       volume={2},
       year={2010}
     }""",
     redistribution_info=,
 ))

In [3]:
type(raw_data)

tuple

In [7]:
len(raw_data)

2

In [4]:
raw_data[0]

{'test': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
 'train': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>}

In [5]:
raw_data[1]

tfds.core.DatasetInfo(
    name='mnist',
    version=3.0.0,
    description='The MNIST database of handwritten digits.',
    homepage='http://yann.lecun.com/exdb/mnist/',
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)

### Comment
- The raw_data is a two elements tuple. 
- The first component contains the data itself and the second coponent contains info.
- We are going to extract the data and the info separately

## Data Preparation

In [8]:
mnist_dataset, mnist_info = raw_data

In [9]:
mnist_info

tfds.core.DatasetInfo(
    name='mnist',
    version=3.0.0,
    description='The MNIST database of handwritten digits.',
    homepage='http://yann.lecun.com/exdb/mnist/',
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)

In [10]:
mnist_dataset

{'test': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
 'train': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>}

### Comment
- The mnist_dataset data set is a dictionary with 2 keys 'train' and 'test'
- we have to create our own validation data set

In [11]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# we set the number of validation sample
# we use 10% validation and 90% train
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples 

# we have to convert it to integer
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [13]:
print(num_validation_samples,
      num_test_samples)

tf.Tensor(6000, shape=(), dtype=int64) tf.Tensor(10000, shape=(), dtype=int64)


### Scalling the input

In [12]:
#scaling an image matrix
def scale(image, label):
    '''scale the pixel and returns: pixel (float32 matrix), label'''
    image = tf.cast(image, dtype=tf.float32)
    image /= 255.
    
    return image, label

#scaling all images in a dataset using map
scaled_train_validation_data = mnist_train.map(scale)
scaled_test_data = mnist_test.map(scale)

### Schuffling the Data

In [14]:
#create a global constant BUFFER_SIZE
#the data will usually be too big to be schuffled at once
# so we shuffle it chunk by chunk and BUFFER_SIZE is the size of a chunk
BUFFER_SIZE = 10000

schuffled_train_validation = scaled_train_validation_data.shuffle(BUFFER_SIZE)

### Extract the train an validation data

In [15]:
validation_data = schuffled_train_validation.take(num_validation_samples)
train_data = schuffled_train_validation.skip(num_validation_samples)

In [16]:
#specifying the batch size

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples) 
test_data = scaled_test_data.batch(num_test_samples)

In [17]:
validation_inputs, validation_targets = next(iter(validation_data))

In [18]:
validation_targets

<tf.Tensor: shape=(6000,), dtype=int64, numpy=array([1, 8, 0, ..., 5, 0, 6])>

# The Training part
<center><b>Data ---> Model ---> Objective function ---> Optimization algorithm</b></center>
<br>

#### <b>The Model</b>

In [23]:
#we have to stack multiple layers on top of each other in a Sequential instance object 

### Hyperparameters
#size of a single layer
layer_size = 150 #50
input_size = 784  # since we have 28 * 28 =784 pixels for one image
output_size = 10 #since we want to recognize 10 digits: 0 to 9

# first we have to flatten each 28x28 matrix into a vector of length 784
# we use 'relu', rectified linear unit,  as activation function for the two hidden layers
# we use 'softmax' activation function for the output layer since we are doing classification
model = tf.keras.Sequential([
                             tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
                             tf.keras.layers.Dense(layer_size, activation='relu'),
                             tf.keras.layers.Dense(layer_size, activation='relu'),
                             tf.keras.layers.Dense(output_size, activation='softmax'),
                            ])

#### <b>The Objective function and the Optimization algorithm</b>

In [24]:
my_optimizer = tf.keras.optimizers.Adam()
my_loss = tf.keras.losses.SparseCategoricalCrossentropy()
#sparse categorical crossentropy applies one-hot encoding 
#i.e convert the 10 categories into 10 standard basis of R^10

# compiling the model
model.compile(optimizer=my_optimizer, loss=my_loss, metrics=['accuracy'])

## <b> Training </b>

#### <b>Fitting the model</b>

In [25]:
NUM_EPOCHS = 15
VALIDATION_STEPS = num_validation_samples / BATCH_SIZE
VALIDATION_STEPS = tf.cast(VALIDATION_STEPS, tf.int64)

my_callback = tf.keras.callbacks.EarlyStopping()

# argument callbacks must be a list, if not we get the erro
#TypeError: 'EarlyStopping' object is not iterable
# train_data contains both inputs and targets
model.fit(train_data,
          epochs=NUM_EPOCHS,
          validation_data=(validation_inputs, validation_targets),
          validation_steps=VALIDATION_STEPS,
          callbacks= [my_callback],
          verbose=2)
#ValueError: `batch_size` or `steps` is required for `Tensor` or `NumPy` input data.
#to avoid this error we need to specify the "validation steps": validation_steps

Epoch 1/15
540/540 - 9s - loss: 0.2957 - accuracy: 0.9156 - val_loss: 0.1488 - val_accuracy: 0.9573
Epoch 2/15
540/540 - 9s - loss: 0.1166 - accuracy: 0.9649 - val_loss: 0.0994 - val_accuracy: 0.9710
Epoch 3/15
540/540 - 9s - loss: 0.0797 - accuracy: 0.9756 - val_loss: 0.0788 - val_accuracy: 0.9767
Epoch 4/15
540/540 - 9s - loss: 0.0578 - accuracy: 0.9821 - val_loss: 0.0732 - val_accuracy: 0.9798
Epoch 5/15
540/540 - 9s - loss: 0.0470 - accuracy: 0.9854 - val_loss: 0.0534 - val_accuracy: 0.9845
Epoch 6/15
540/540 - 9s - loss: 0.0369 - accuracy: 0.9883 - val_loss: 0.0521 - val_accuracy: 0.9850
Epoch 7/15
540/540 - 9s - loss: 0.0312 - accuracy: 0.9902 - val_loss: 0.0438 - val_accuracy: 0.9868
Epoch 8/15
540/540 - 8s - loss: 0.0262 - accuracy: 0.9921 - val_loss: 0.0356 - val_accuracy: 0.9885
Epoch 9/15
540/540 - 9s - loss: 0.0199 - accuracy: 0.9939 - val_loss: 0.0338 - val_accuracy: 0.9900
Epoch 10/15
540/540 - 9s - loss: 0.0173 - accuracy: 0.9945 - val_loss: 0.0263 - val_accuracy: 0.9913

<tensorflow.python.keras.callbacks.History at 0x7fe48fbe7210>

## <b> Testing / Making Predictions

We test the model by using the method <b>.evaluate()</b>

In [26]:
test_loss, test_accuracy = model.evaluate(test_data)



In [27]:
test_loss

0.09012626856565475

In [28]:
test_accuracy

0.9779000282287598

## <b>Predicting in Deployment</b>

In [36]:
test_data

<DatasetV1Adapter shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>

In [37]:
#test_result = model.predict_on_batch(test_data)
#test_result_np = test_result.numpy()
#test_result_np.round(1)
test_result = model.predict(test_data)

In [38]:
type(test_result)

numpy.ndarray

In [40]:
test_result

array([[1.15202026e-16, 3.31078602e-11, 1.00000000e+00, ...,
        9.43522604e-14, 2.22625304e-11, 5.89938099e-18],
       [1.00000000e+00, 1.50231786e-15, 9.18348569e-16, ...,
        1.52118281e-13, 2.35420299e-13, 1.73108530e-14],
       [3.34317022e-15, 8.11343614e-12, 4.27930295e-14, ...,
        7.49771889e-09, 2.49219048e-13, 1.13883993e-06],
       ...,
       [3.12734834e-08, 6.01175643e-09, 4.82653959e-05, ...,
        3.56560996e-11, 9.99951720e-01, 2.29180591e-10],
       [9.99994397e-01, 1.82687102e-10, 5.65142591e-06, ...,
        7.68762998e-10, 3.06850492e-13, 8.87734886e-10],
       [1.02824169e-13, 9.14863888e-13, 6.59161690e-13, ...,
        7.25800099e-17, 1.50805507e-10, 1.09250855e-11]], dtype=float32)

In [42]:
final_test_result = test_result.round(1)

In [43]:
final_test_result

array([[0., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)