# Import libraries

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

### Defining some constants/hyperparameters

In [2]:
BUFFER_SIZE = 70_000
BATCH_SIZE = 128
NUM_EPOCHS = 20

### Download the MNIST dataset

In [3]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [4]:
# Extract the train and test dataset
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# Preprocess the data

### Scaling the dataset

In [5]:
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255
    
    return image, label

In [6]:
train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

### Defining the set sizes

In [7]:
# Validation set size
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [8]:
# Test set size
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

### Shuffle the data

In [9]:
train_and_validation_data = train_and_validation_data.shuffle(BUFFER_SIZE)

### Splitting the train and validation datasets

In [10]:
# Train data
train_data = train_and_validation_data.skip(num_validation_samples)

# Validation data
validation_data = train_and_validation_data.take(num_validation_samples)

### Batching the data
#### NOTE: For proper functioning of the model, we need to create one big batch for the validation and test sets

In [12]:
# Train data
train_data = train_data.batch(BATCH_SIZE)

# Test data
test_data = test_data.batch(num_test_samples)

# Validation test
validation_data = validation_data.batch(num_validation_samples)

# Creating the model and train it

### Outlining the model / Architecture of our CNN

In [13]:
# CONV -> MAXPOOL -> CONV -> MAXPOOL -> FLATTEN -> DENSE
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(50, 5, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(50, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)    
])

In [17]:
# A brief summary of the model and parameters
model.summary(line_length = 75)

Model: "sequential"
___________________________________________________________________________
 Layer (type)                    Output Shape                  Param #     
 conv2d (Conv2D)                 (None, 24, 24, 50)            1300        
                                                                           
 max_pooling2d (MaxPooling2D)    (None, 12, 12, 50)            0           
                                                                           
 conv2d_1 (Conv2D)               (None, 10, 10, 50)            22550       
                                                                           
 max_pooling2d_1 (MaxPooling2D)  (None, 5, 5, 50)              0           
                                                                           
 flatten (Flatten)               (None, 1250)                  0           
                                                                           
 dense (Dense)                   (None, 10)                    12510

### Defining the loss function

In [18]:
# Instead of incorporating the softmax into the model itself, we use
# a loss calculation that automatically corrects for the missing softmax

# from_logits = True -> this tells tensorflow to incorporate softmax into the loss function
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)

### Compile the model

In [19]:
# This will combine the model with de loss function and optimizer, and will prepare our network for training
model.compile(optimizer='adam', loss = loss_fn, metrics=['accuracy'])

### Defining an early-stopping mechanism

In [20]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    mode = 'auto',
    min_delta = 0,
    patience = 2,
    verbose = 0,
    restore_best_weights = True
)

### Train the network

In [21]:
model.fit(
    train_data,
    epochs = NUM_EPOCHS,
    callbacks = [early_stopping],
    validation_data = validation_data,
    verbose = 2
)

Epoch 1/20
422/422 - 16s - loss: 0.2719 - accuracy: 0.9226 - val_loss: 0.0779 - val_accuracy: 0.9762 - 16s/epoch - 37ms/step
Epoch 2/20
422/422 - 17s - loss: 0.0697 - accuracy: 0.9787 - val_loss: 0.0577 - val_accuracy: 0.9815 - 17s/epoch - 41ms/step
Epoch 3/20
422/422 - 16s - loss: 0.0525 - accuracy: 0.9841 - val_loss: 0.0435 - val_accuracy: 0.9875 - 16s/epoch - 37ms/step
Epoch 4/20
422/422 - 21s - loss: 0.0442 - accuracy: 0.9863 - val_loss: 0.0396 - val_accuracy: 0.9872 - 21s/epoch - 50ms/step
Epoch 5/20
422/422 - 21s - loss: 0.0364 - accuracy: 0.9891 - val_loss: 0.0302 - val_accuracy: 0.9908 - 21s/epoch - 49ms/step
Epoch 6/20
422/422 - 15s - loss: 0.0319 - accuracy: 0.9906 - val_loss: 0.0269 - val_accuracy: 0.9918 - 15s/epoch - 35ms/step
Epoch 7/20
422/422 - 15s - loss: 0.0287 - accuracy: 0.9911 - val_loss: 0.0158 - val_accuracy: 0.9953 - 15s/epoch - 36ms/step
Epoch 8/20
422/422 - 15s - loss: 0.0254 - accuracy: 0.9926 - val_loss: 0.0197 - val_accuracy: 0.9943 - 15s/epoch - 35ms/step


<keras.callbacks.History at 0x1fa821f79d0>

# Testing our model

In [22]:
# Testing the model
test_loss, test_accuracy = model.evaluate(test_data)



In [25]:
# Printing the test results
print('Test loss: {0: .4f}. Test acuraccy: {1: .2f}%'.format(test_loss, test_accuracy*100.))

Test loss:  0.0364. Test acuraccy:  98.87%
