# Assignment 1: Experiment with handwritten text recognition using different optimization methods provided in Keras.

In [1]:
# Using tensorflow-gpu 2.3.1
import tensorflow as tf
from tensorflow import keras

import numpy as np

In [2]:
# Parameters setup

EPOCHS = 50 # The number of epochs is a hyperparameter of gradient descent that controls the number of complete passes through the training dataset.

BATCH_SIZE = 128 # The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters.

VERBOSE = 1 # Used for decide how much output we want when training the model.

NB_CLASSES = 10 # Number of outputs = number of digits (from 0 ~ 9)

N_HIDDEN = 128 # Positive integer, dimensionality of the output space.

VALIDATION_SPLIT = 0.2 # How much TRAIN is reserved for VALIDATION.

DROPOUT = 0.3 # The ratio of dropout variable.

In [3]:
# Loading MNIST dataset, verifing the split between train and test is 60,000, and 10,000 respectly.
# One-hot is automatically applied.
mnist = keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

# X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784
RESHAPED = 784
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Applying normalization in [0,1]
X_train, X_test = X_train / 255.0, X_test / 255.0

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# one-hot
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

60000 train samples
10000 test samples


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_layer_2 (Dense)        (None, 128)               16512     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_layer_3 (Dense)        (None, 10)                1290      
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


## Using optimizer SGD (stochastic gradient decent)

In [13]:
test_acc_results = []

# Train and evaluate the model 10 times and get the average test accuracy for comparison
for i in range(10):
	# Build the model

	# Sequential provides training and inference features on this model.
	model = tf.keras.models.Sequential()

	model.add(keras.layers.Dense(N_HIDDEN,
			input_shape=(RESHAPED,),
			name='dense_layer', activation='relu'))
	model.add(keras.layers.Dropout(DROPOUT))
	model.add(keras.layers.Dense(N_HIDDEN,
			name='dense_layer_2', activation='relu'))
	model.add(keras.layers.Dropout(DROPOUT))
	model.add(keras.layers.Dense(NB_CLASSES,
			name='dense_layer_3', activation='softmax'))

	# Summary of the model
	model.summary()

	# Compile the model
	model.compile(optimizer='SGD', 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

	# Train the moodel
	model.fit(X_train, Y_train,
			batch_size=BATCH_SIZE, epochs=EPOCHS,
			verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

	# Evalute the model
	test_loss, test_acc = model.evaluate(X_test, Y_test)
	print('Test accuracy:', test_acc)
	test_acc_results.append(test_acc)

299 - val_loss: 0.1616 - val_accuracy: 0.9534
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test accuracy: 0.9628000259399414
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          (None, 128)               100480    
_________________________________________________________________
dropout_18 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_layer_2 (Dense)        (None, 128)               16512     
_________________________________________________________________
dropout_19 (Dropout)         (None, 128)               0         
____________________________________________________

In [14]:
print("Average test accuracy using SGD is :", sum(test_acc_results) / len(test_acc_results))

Average test accuracy using SGD is : 0.9620800137519836
