In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd "/content/drive/MyDrive/Spring_2023_Project/COMS_572_Source_Code/MNIST"

In [None]:
## load package
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from keras.models import Sequential
from sklearn.model_selection import train_test_split

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
# load build-in dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [None]:
## reshape and normalize data
x_train = x_train.reshape(60000, 28, 28, 1).astype("float32") / 255
x_test = x_test.reshape(10000, 28, 28, 1).astype("float32") / 255
y_train = y_train.reshape(60000,)
y_test = y_test.reshape(10000,)


In [None]:
x_split_train, x_split_validation, y_split_train, y_split_validation = train_test_split(x_train,y_train, train_size=0.8)

In [None]:
plt.imshow(x_split_train[0])

In [None]:
## define the model architecture

model = Sequential()
model.add(keras.layers.Conv2D(16, (2, 2), activation='relu', input_shape=(28,28,1)))
model.add(keras.layers.Conv2D(16, (2, 2), activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Conv2D(64, (2, 2), activation='relu'))
model.add(keras.layers.Conv2D(64, (2, 2), activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(256, activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(256, activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(10, activation='softmax'))

model.summary()

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      optimizer=opt,
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
  )

In [None]:
## Trainning the model
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=3)
with tf.device('/device:GPU:0'):
  history = model.fit(x_split_train, y_split_train, validation_data=(x_split_validation, y_split_validation), epochs = 30, verbose=1, callbacks=[reduce_lr])

In [None]:
def summarize_diagnostics(history):
    # plot loss
    plt.figure()
    plt.subplot(211)
    plt.title('Cross Entropy Loss')
    plt.plot(history.history['loss'], color='blue', label='train')
    plt.plot(history.history['val_loss'], color='orange', label='test')
    # plot accuracy
    plt.subplot(212)
    plt.title('Classification Accuracy')
    plt.plot(history.history['sparse_categorical_accuracy'], color='blue', label='train')
    plt.plot(history.history['val_sparse_categorical_accuracy'], color='orange', label='test')
    


In [None]:
history.history.keys()

In [None]:
summarize_diagnostics(history)

In [None]:
final_model = Sequential()
final_model.add(keras.layers.Conv2D(16, (2, 2), activation='relu', input_shape=(28,28,1)))
final_model.add(keras.layers.Conv2D(16, (2, 2), activation='relu'))
final_model.add(keras.layers.BatchNormalization())
final_model.add(keras.layers.MaxPooling2D((2, 2)))
final_model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
final_model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
final_model.add(keras.layers.BatchNormalization())
final_model.add(keras.layers.MaxPooling2D((2, 2)))
final_model.add(keras.layers.Conv2D(64, (2, 2), activation='relu'))
final_model.add(keras.layers.Conv2D(64, (2, 2), activation='relu'))
final_model.add(keras.layers.BatchNormalization())
final_model.add(keras.layers.MaxPooling2D((2, 2)))
final_model.add(keras.layers.Flatten())
final_model.add(keras.layers.Dense(256, activation='relu'))
final_model.add(keras.layers.BatchNormalization())
final_model.add(keras.layers.Dense(256, activation='relu'))
final_model.add(keras.layers.BatchNormalization())
final_model.add(keras.layers.Dense(10, activation='softmax'))

final_model.summary()

In [None]:
obtained_learning_rate = np.array(history.history['lr'])

In [None]:
def scheduler(epoch):
  return obtained_learning_rate[epoch]

In [None]:
callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
final_model.compile(
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      optimizer=opt,
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
  )

In [None]:
with tf.device('/device:GPU:0'):
  final_history = final_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs = 1, verbose=30, callbacks=[callback])

In [None]:
 # evaluate the model
 result = final_model.evaluate(x_test, y_test, verbose=1)


In [None]:
result[1]*100

In [None]:
final_model.save('MNIST_self_build_model.h5')