In [1]:
import tensorflow as tf 
from tensorflow import keras
import numpy as np

In [2]:
# Network and training
EPOCHS = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 # how TRAIN is reserved for VALIDATION
DROPOUT = 0.3

In [3]:
# Loading MNIST dataset 
# Labels have one-hot representation
mnist = keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [4]:
# X_train is 60000 rows of 28*28 values; we reshape it to 60000 * 784
RESHAPED = 784

#
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [5]:
# Normalize inputs to be within in [0, 1]
X_train, X_test = X_train / 255.0, X_test / 255.0
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

60000 train samples
10000 test samples


In [6]:
# Labels have one-hot representation
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

In [7]:
# Build the model
model = tf.keras.models.Sequential()

model.add(keras.layers.Dense(N_HIDDEN,
            input_shape =(RESHAPED,),
            name = 'dense_layer', activation = 'relu'))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(N_HIDDEN,
            name = 'dense_layer_2', activation = 'relu'))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(NB_CLASSES,
            name = 'dense_layer_3', activation = 'softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Summary of the model
model.summary()

In [9]:
# Compiling the model
model.compile(optimizer='SGD',
            loss = 'categorical_crossentropy',
            metrics = ['accuracy'])

In [10]:
# Training the model
model.fit(X_train, Y_train,
        batch_size = BATCH_SIZE, epochs = EPOCHS,
        verbose = VERBOSE, validation_split = VALIDATION_SPLIT)

Epoch 1/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3111 - loss: 2.0287 - val_accuracy: 0.8263 - val_loss: 0.8770
Epoch 2/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7006 - loss: 0.9890 - val_accuracy: 0.8725 - val_loss: 0.5210
Epoch 3/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7798 - loss: 0.7145 - val_accuracy: 0.8878 - val_loss: 0.4229
Epoch 4/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8154 - loss: 0.6071 - val_accuracy: 0.8972 - val_loss: 0.3718
Epoch 5/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8389 - loss: 0.5418 - val_accuracy: 0.9043 - val_loss: 0.3399
Epoch 6/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8504 - loss: 0.4930 - val_accuracy: 0.9110 - val_loss: 0.3171
Epoch 7/200
[1m375/37

<keras.src.callbacks.history.History at 0x239f7923f40>

In [11]:
# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, Y_test)
print('Test accuracy:', test_acc)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9727 - loss: 0.0901
Test accuracy: 0.9768000245094299


In [12]:
# Compiling the model 
## optimizer = RMSProp
model.compile(optimizer='RMSProp',
            loss = 'categorical_crossentropy',
            metrics = ['accuracy'])

In [13]:
# Training the model
model.fit(X_train, Y_train,
        batch_size = BATCH_SIZE, epochs = EPOCHS,
        verbose = VERBOSE, validation_split = VALIDATION_SPLIT)

Epoch 1/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9660 - loss: 0.1093 - val_accuracy: 0.9753 - val_loss: 0.0889
Epoch 2/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9671 - loss: 0.1038 - val_accuracy: 0.9755 - val_loss: 0.0951
Epoch 3/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9720 - loss: 0.0915 - val_accuracy: 0.9755 - val_loss: 0.0943
Epoch 4/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9715 - loss: 0.0937 - val_accuracy: 0.9778 - val_loss: 0.0835
Epoch 5/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9729 - loss: 0.0877 - val_accuracy: 0.9774 - val_loss: 0.0877
Epoch 6/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9750 - loss: 0.0794 - val_accuracy: 0.9769 - val_loss: 0.0894
Epoch 7/200
[1m375/37

<keras.src.callbacks.history.History at 0x239fa04bdc0>

In [14]:
# Compiling the model 
## optimizer = adam
model.compile(optimizer='Adam',
            loss = 'categorical_crossentropy',
            metrics = ['accuracy'])

In [15]:
# Training the model
model.fit(X_train, Y_train,
        batch_size = BATCH_SIZE, epochs = 50,
        verbose = VERBOSE, validation_split = VALIDATION_SPLIT)

Epoch 1/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9904 - loss: 0.0404 - val_accuracy: 0.9785 - val_loss: 0.1767
Epoch 2/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9891 - loss: 0.0413 - val_accuracy: 0.9788 - val_loss: 0.1775
Epoch 3/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9901 - loss: 0.0335 - val_accuracy: 0.9777 - val_loss: 0.1785
Epoch 4/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9898 - loss: 0.0376 - val_accuracy: 0.9796 - val_loss: 0.1576
Epoch 5/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9901 - loss: 0.0374 - val_accuracy: 0.9799 - val_loss: 0.1569
Epoch 6/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9895 - loss: 0.0383 - val_accuracy: 0.9806 - val_loss: 0.1545
Epoch 7/50
[1m375/375[0m 

<keras.src.callbacks.history.History at 0x239fa081870>

In [16]:
# from tf.keras.regularizers import l2, activity_l2
# model.add(Dense(64, input_dim = 64, w_regularizer=l2(0.01),
#                 activity_regularizer=activity_l2(0.01)))