In [1]:
# Adapted from LDL Chapter 5

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import logging

In [3]:
tf.get_logger().setLevel(logging.ERROR)
tf.random.set_seed(7)

In [4]:
EPOCHS = 20
BATCH_SIZE = 1

In [5]:
# Loading training and test datasets
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Standardization of data
mean = np.mean(train_images)
stddev = np.std(train_images)

train_images = (train_images - mean) / stddev
test_images = (test_images - mean) / stddev

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
# One-hot encode labels
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)

In [7]:
# Initializing weights
initializer = keras.initializers.RandomUniform(minval=-0.1, maxval=0.1)

In [8]:
# Sequential model
# 784 inputs (flattened)
# Two Dense, fully connected layers with 25 and 10 neurons
# tanh as activation function for hidden layer
# Logistic (sigmoid) as activation function for output layer.

model = keras.Sequential([keras.layers.Flatten(input_shape=(28, 28)),
                          keras.layers.Dense(25, activation='tanh', kernel_initializer=initializer, bias_initializer='zeros'),
                          keras.layers.Dense(10, activation='sigmoid', kernel_initializer=initializer, bias_initializer='zeros')])

In [13]:
# SGD with learning rate of 0.01
# MSE as loss function and show accuracy during training

opt = keras.optimizers.SGD(learning_rate=0.01)

model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy'])

# Train for 20 epochs, shuffled, and update after each example (batch_size=1) --- this one was taking too long so increased batch size
# history = model.fit(train_images, train_labels,
#                     validation_data=(test_images, test_labels), epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=2, shuffle=True)

# Increased to 25 here
history = model.fit(train_images, train_labels,
                    validation_data=(test_images, test_labels), epochs=EPOCHS, batch_size=25, verbose=2, shuffle=True)


Epoch 1/20
2400/2400 - 17s - loss: 0.0169 - accuracy: 0.9142 - val_loss: 0.0165 - val_accuracy: 0.9179 - 17s/epoch - 7ms/step
Epoch 2/20
2400/2400 - 24s - loss: 0.0167 - accuracy: 0.9149 - val_loss: 0.0163 - val_accuracy: 0.9179 - 24s/epoch - 10ms/step
Epoch 3/20
2400/2400 - 24s - loss: 0.0166 - accuracy: 0.9158 - val_loss: 0.0162 - val_accuracy: 0.9182 - 24s/epoch - 10ms/step
Epoch 4/20
2400/2400 - 24s - loss: 0.0164 - accuracy: 0.9162 - val_loss: 0.0160 - val_accuracy: 0.9188 - 24s/epoch - 10ms/step
Epoch 5/20
2400/2400 - 24s - loss: 0.0163 - accuracy: 0.9168 - val_loss: 0.0159 - val_accuracy: 0.9196 - 24s/epoch - 10ms/step
Epoch 6/20
2400/2400 - 24s - loss: 0.0162 - accuracy: 0.9175 - val_loss: 0.0158 - val_accuracy: 0.9200 - 24s/epoch - 10ms/step
Epoch 7/20
2400/2400 - 24s - loss: 0.0161 - accuracy: 0.9175 - val_loss: 0.0157 - val_accuracy: 0.9208 - 24s/epoch - 10ms/step
Epoch 8/20
2400/2400 - 24s - loss: 0.0159 - accuracy: 0.9180 - val_loss: 0.0155 - val_accuracy: 0.9208 - 24s/epo

In [14]:
# Different kernel_initializer as well as activation functions
model2 = keras.Sequential([
    keras.layers.Flatten(input_shape=(28,28)),
    keras.layers.Dense(25, activation='relu', kernel_initializer='he_normal', bias_initializer='zeros'),
    keras.layers.Dense(10, activation='softmax', kernel_initializer='glorot_uniform', bias_initializer='zeros')
])

In [15]:
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [16]:
# Similarly increased here to 25
history2 = model2.fit(train_images, train_labels, validation_data=(test_images, test_labels), epochs=EPOCHS, batch_size=25, verbose=2, shuffle=True)

Epoch 1/20
2400/2400 - 5s - loss: 0.3090 - accuracy: 0.9087 - val_loss: 0.1982 - val_accuracy: 0.9409 - 5s/epoch - 2ms/step
Epoch 2/20
2400/2400 - 5s - loss: 0.1852 - accuracy: 0.9459 - val_loss: 0.1647 - val_accuracy: 0.9527 - 5s/epoch - 2ms/step
Epoch 3/20
2400/2400 - 5s - loss: 0.1555 - accuracy: 0.9539 - val_loss: 0.1605 - val_accuracy: 0.9547 - 5s/epoch - 2ms/step
Epoch 4/20
2400/2400 - 5s - loss: 0.1361 - accuracy: 0.9589 - val_loss: 0.1429 - val_accuracy: 0.9586 - 5s/epoch - 2ms/step
Epoch 5/20
2400/2400 - 5s - loss: 0.1254 - accuracy: 0.9624 - val_loss: 0.1309 - val_accuracy: 0.9627 - 5s/epoch - 2ms/step
Epoch 6/20
2400/2400 - 5s - loss: 0.1143 - accuracy: 0.9652 - val_loss: 0.1364 - val_accuracy: 0.9609 - 5s/epoch - 2ms/step
Epoch 7/20
2400/2400 - 5s - loss: 0.1064 - accuracy: 0.9681 - val_loss: 0.1489 - val_accuracy: 0.9580 - 5s/epoch - 2ms/step
Epoch 8/20
2400/2400 - 5s - loss: 0.1008 - accuracy: 0.9691 - val_loss: 0.1445 - val_accuracy: 0.9597 - 5s/epoch - 2ms/step
Epoch 9/