In [1]:
#import libraries

import numpy as np
import tensorflow as tf

In [2]:
tf.__version__

'2.3.0'

In [13]:
#loading data

from tensorflow.keras.datasets import mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
raw_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
raw_test = tf.data.Dataset.from_tensor_slices((X_test, Y_test))

In [93]:
#scale, shuffle, and batch data

num_validation_samples = 0.1 * len(raw_train)
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = len(raw_test)
num_test_samples = tf.cast(num_test_samples, tf.int64)

def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = raw_train.map(scale)
scaled_test_data = raw_test.map(scale)

BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = scaled_test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

In [109]:
input_size = 784
output_size = 10
hidden_layer_size = 225

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='tanh'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [110]:
#Choosing optimizer and loss funtion

#custom_optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [111]:
#Training

NUM_EPOCHS = 5

model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 3s - loss: 0.2447 - accuracy: 0.9263 - val_loss: 0.1159 - val_accuracy: 0.9657
Epoch 2/5
540/540 - 3s - loss: 0.0945 - accuracy: 0.9712 - val_loss: 0.0779 - val_accuracy: 0.9743
Epoch 3/5
540/540 - 3s - loss: 0.0656 - accuracy: 0.9785 - val_loss: 0.0697 - val_accuracy: 0.9775
Epoch 4/5
540/540 - 3s - loss: 0.0479 - accuracy: 0.9849 - val_loss: 0.0541 - val_accuracy: 0.9832
Epoch 5/5
540/540 - 3s - loss: 0.0422 - accuracy: 0.9864 - val_loss: 0.0382 - val_accuracy: 0.9875


<tensorflow.python.keras.callbacks.History at 0x291ad95c100>

In [112]:
#Testing

test_loss, test_accuracy = model.evaluate(test_data)



Takeaways from training and testing MNIST data:

The width of the model seemed to be the most influential on the accuracy. Changing the size of hidden layers showed improvements by whole percentage points.
Depth seemed to be more a more negligible point, but picking activation functions did make a noticeable difference. While sigmoid tended to yield worse results, combinations of relu and tanh performed better.
Changes in batch size most notably affected the time it took to train. The lower the batch, the higher the time.
Apart from an anomaly where a change in learning rate gave a 99% accuracy, testsing showed a change from the default rate did not improve results.