In [1]:
import numpy as np
import tensorflow as tf

In [2]:
import tensorflow_datasets as tfds

Data

In [3]:
mnist_dataset, mnist_info = tfds.load(name="mnist", with_info=True, as_supervised=True)

In [4]:
#save the train and test data sets
mnist_train, mnist_test = mnist_dataset["train"], mnist_dataset["test"]

#decide how many samples go into the validation data set and cast as integer
num_validation_samples = 0.1*mnist_info.splits["train"].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

#save how many samples go into the test data set and cast as integer
num_test_samples = mnist_info.splits["test"].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

#define a scaling function that scales the greyscale numbers from 0-255 to 0-1
def scale(image, label):
    
    image = tf.cast(image, tf.float32)
    image /=255.
    return image, label

#scale the train/validation and the test dataset
scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

#define buffer size for shuffling (to not shuffle all at once to not overload computer
#shuffle the data
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
shuffled_test_data =test_data.shuffle(BUFFER_SIZE)

In [5]:
#split into validation and train data
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

In [6]:
#split train data into batches for mini_batch gradient descent
BATCH_SIZE = 1000

train_data = train_data.batch(BATCH_SIZE)

#no batches for testing and validation as only forward propagation (no backpropagation) 
#less power needed
#also exact (not average) values wanted
#model still needs batched data, so transform into data with one batch

validation_data = validation_data.batch(num_validation_samples)
test_data = shuffled_test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))


Model

Outline the model

In [7]:
input_size = 784
output_size = 10
hidden_layer_size = 1000

model = tf.keras.Sequential([
    
    
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation ="relu"),
    tf.keras.layers.Dense(hidden_layer_size, activation ="tanh"),
    tf.keras.layers.Dense(output_size, activation="softmax")
    
])

Choosing the optimizer and the loss function

In [8]:
#loss function for categorizing: crossentropy
#categorical data, sparse also does the one-hot encoding
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

Training

In [9]:
NUM_EPOCHS = 5

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

#train data, number of epochs, what validation data to use (inputs and targets)
#Validation steps specifies the number of validation batches selected for performing validation at the end of each epoch
#we only have one batch, as we use the whole validation data, so 1

model.fit(train_data, epochs=NUM_EPOCHS,  callbacks=[early_stopping], validation_data=(validation_inputs, validation_targets), validation_steps = 1, verbose = 2)

Epoch 1/5
54/54 - 17s - loss: 0.3856 - accuracy: 0.8835 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
54/54 - 12s - loss: 0.1221 - accuracy: 0.9633 - val_loss: 0.0868 - val_accuracy: 0.9727
Epoch 3/5
54/54 - 12s - loss: 0.0720 - accuracy: 0.9784 - val_loss: 0.0631 - val_accuracy: 0.9820
Epoch 4/5
54/54 - 12s - loss: 0.0488 - accuracy: 0.9864 - val_loss: 0.0458 - val_accuracy: 0.9865
Epoch 5/5
54/54 - 12s - loss: 0.0328 - accuracy: 0.9903 - val_loss: 0.0328 - val_accuracy: 0.9895


<tensorflow.python.keras.callbacks.History at 0x1bd943f12c8>

Test the model

In [46]:
test_loss, test_accuracy = model.evaluate(test_data)



In [47]:
print("Test loss: {0:.2f}, Test accuracy: {1:.2f}%".format(test_loss, test_accuracy*100))

Test loss: 0.06, Test accuracy: 97.84%
