In [None]:
#pip install comet_ml > /dev/null 2>&1
import comet_ml
# ENTER YOUR COMET API KEY HERE!!
COMET_API_KEY = ""

# Import Tensorflow 2.0 - 
# pip install tensorflow-macos (optimized version)
# pip install tensorflow-metal (GPU acceleration)
import tensorflow as tf

# other packages
import matplotlib.pyplot as plt
import numpy as np

import cv2

In [None]:
print("GPUs available:", tf.config.list_physical_devices('GPU'))
# Check that we are using a GPU, if not switch runtimes
# using Runtime > Change Runtime Type > GPU
assert len(tf.config.list_physical_devices('GPU')) > 0
assert COMET_API_KEY != "", "Please insert your Comet API Key"

In [None]:
# Create a Comet experiment function to track each of the training runs

def create_experiment(optimizer, learning_rate):
  # end any prior experiments
  if 'experiment' in locals():
    experiment.end()

  # initiate the comet experiment for tracking
  experiment = comet_ml.Experiment(
                  api_key=COMET_API_KEY,
                  project_name="MNIST_CNN")
  
  experiment.set_name(optimizer + "_" + learning_rate)
  return experiment


In [None]:
# LOAD DATASET
mnist = tf.keras.datasets.mnist.load_data()

(train_images, train_labels), (test_images, test_labels) = mnist

print("Training images shape: ", train_images.shape)
print("Training labels shape: {}".format(train_labels.shape))
print("Testing images shape: {}".format(tf.shape(test_images)))
print(f"Test labels shape: {test_labels.shape} ")

In [None]:
# PREPARE DATASET FOR CNN
# Adjust the shape of the images and normalize pixel values in the range [0,1]
print(f"{' Not normalized image values ':-^100}")
print(train_images[0])
# Mnist dataset contains images of size 28x28 and pixel values in the range [0,255]

train_images = (np.expand_dims(train_images, axis = -1)/255.0).astype(np.float32)
test_images = (np.expand_dims(test_images, axis = -1)/255.0).astype(np.float32)
train_labels = train_labels.astype(np.int64)
test_labels = test_labels.astype(np.int64)

print(f"{' Printing train_images values ':-^100}")
print(train_images[0])

In [None]:
# Plot 36 random images from the dataset
print(f"{ ' Plotting 36 random images ':-^100}")

print(f"{' Randomly selected images ':-^100}")
random_indices = np.random.choice(train_images.shape[0],36,replace = False)
print(f"Random indices: {random_indices}")

print(f"{' Plotting the images ':-^100}")

plt.figure(figsize=(10, 10))
for i in range(36):
    plt.subplot(6,6,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[random_indices[i]], cmap = plt.cm.binary)
    plt.axis('off')
    title = f"Label: {train_labels[random_indices[i]]} \n Index: {random_indices[i]}"
    plt.title(title)

plt.subplots_adjust(hspace=0.5)
plt.suptitle("MNIST Dataset - Randomly Selected Images", fontsize=16)

print(f"{' Logging the images to Comet ':-^100}")


In [None]:
# 0 DEFINE HYPERPARAMETERES + Experiment creation

params = {
    "learning_rate": 0.0001,
    "batch_size": 64,
    "num_epochs": 5,
    "loss_function": "sparse_categorical_crossentropy",
}

comet_model_fcnn = create_experiment("Adam", str(params["learning_rate"]))


In [None]:
# 1 CREATE A FULLY CONNECTED MODEL

print(f"{' Creating Fully Connected Neural Network ':-^100}")

def build_fc_model():
    fc_model = tf.keras.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation = 'relu'),
        tf.keras.layers.Dense(10, activation = 'softmax')
    ])
    return fc_model

fc_model = build_fc_model()
fc_model.summary()

In [None]:
# 2 DEFINE LOSS, OPTIMIZER AND METRICS
print(f"{' Defining Loss, Optimizer and Metrics using model.compile ':-^100}")

fc_model.compile(
    loss = params['loss_function'],
    optimizer = tf.keras.optimizers.Adam(learning_rate=params['learning_rate']),
    metrics = ['accuracy']
)

In [None]:
# 3 TRAIN THE MODEL
print(f"{' Training the model using model.fit ':-^100}")

fc_model.fit(train_images, train_labels,batch_size = params['batch_size'], epochs = params['num_epochs'])
comet_model_fcnn.end()

# It has been seen that: 
# SGD Optimizer converges faster with higher learning rates, however, learning rates greater than
# 0.1 can cause the model to diverge. Best model: learning rate = 0.05
# Adam optimizer converges faster than SGD and requires smaller learnings rates < 0.001. Best 
# model: learning rate = 0.0005

# The best model of all is using the Adam optimizer with a learning rate of 0.0005, yielding an 
# accuracy of 0.9254.

# SGD optimizer is more sensitive to the learning rate than Adam optimizer.


In [None]:
# 4 EVALUATE THE MODEL ON TEST DATASET
print(f"{' Evaluating the model using model.evaluate ':-^100}")

test_loss, test_accuracy = fc_model.evaluate(test_images, test_labels, batch_size = params['batch_size'])

print(f"Test accuracy: {test_accuracy:.4f}")

# When a machine learning model performs worse on new data than on its training data is an example
# of overfitting = the accuracy on the test dataset is lower than the accuracy on the training dataset.

# The max accuracy on the training dataset that we can achieve using a fully connected neural network
# is 0.92. How can we do better?

In [None]:
# 0 DEFINE HYPERPARAMETERES + Experiment creation

params = {
    "learning_rate": 0.0005,
    "batch_size": 64,
    "num_epochs": 5,
    "loss_function": "sparse_categorical_crossentropy",
}

comet_model_cnn = create_experiment("Adam", str(params["learning_rate"]))


In [None]:
# 1 DEFINE THE CNN MODEL
print(f"{' Creating Convolutional Neural Network ':-^100}")
cnn_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(24,kernel_size=(3,3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(36,kernel_size=(3,3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

cnn_model.predict(train_images[[0]])
cnn_model.summary()

In [None]:
# 2 DEFINE LOSS, OPTIMIZER AND METRICS
print(f"{' Defining Loss, Optimizer and Metrics using model.compile ':-^100}")

cnn_model.compile(
    loss = params['loss_function'],
    optimizer = tf.keras.optimizers.Adam(learning_rate = params['learning_rate']),
    metrics = ['accuracy']
)

In [None]:
# 3 TRAIN THE MODEL USING MODEL.FIT
print(f"{' Training the model using model.fit ':-^100}")

cnn_model.fit(train_images, train_labels, batch_size = params['batch_size'], epochs = params['num_epochs'])
comet_model_cnn.end()

In [None]:
# 4 EVALUATE THE MODEL USING MODEL.EVALUATE
print(f"{' Evaluating the model using model.evaluate ':-^100}")

test_loss, test_accuracy = cnn_model.evaluate(test_images, test_labels, batch_size = params['batch_size'])

print(f"Test accuracy: {test_accuracy:.4f}")


# Best model for: 
# SGD Optimizer: learning rate = 0.01
# Adam Optimizer: learning rate = 0.0005

In [None]:
# 5 MAKE PREDICTION WITH CNN MODEL
print(f"{' Making predictions using model.predict ':-^100}")

predictions = cnn_model.predict(test_images)


In [None]:
# Visualize the classification results
plt.figure(figsize=(10, 10))

random_indices = np.random.choice(test_images.shape[0],36,replace = False)

for i in range(36):
    plt.subplot(6,6,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(test_images[random_indices[i]], cmap = plt.cm.binary)
    plt.axis('off')
    plt.title(f"Predicted label: {np.argmax(predictions[random_indices[i]])} \n Actual label: {test_labels[random_indices[i]]}", fontsize = 8)

plt.subplots_adjust(hspace=0.5)
plt.suptitle("MNIST Dataset - Randomly Selected Images", fontsize=16)

In [None]:
# Predict own data

img = cv2.imread("digit3.jpeg", cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28, 28))
img = cv2.bitwise_not(img)
img = (img/255.0).astype(np.float32)
img = np.expand_dims(img, axis = -1)


prediction = cnn_model.predict(np.array([img]))

plt.figure(figsize=(5, 5))
plt.imshow(img, cmap = plt.cm.binary)
plt.axis('off')
plt.title(f"Predicted label: {np.argmax(prediction)} \n Actual label: 3", fontsize = 8)