In [47]:
#Import relevant libraries
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
# import datetime
# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tensorboard.plugins.hparams import api as hp

In [48]:
#Define the hyperparameters
BUFFER_SIZE = 70_000
BATCH_SIZE = 128
NUM_EPOCHS = 20

In [49]:
#Download the dataset
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info = True, as_supervised=True)

mnist_train, mnist_test = mnist_dataset['train'],  mnist_dataset['test']


#Scaling the dataset. Fist cast the image to avoid value error
def scale(image,label):
    image = tf.cast(image, tf.float32)
    image /= 225.
    
    return image, label
    

In [50]:
train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

#creating the number of validation dataset from the training dataset
num_validation_samples = 0.1*mnist_info.splits['train'].num_examples

#convert number of the validation sample to an integer to prevent potential issue
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

#get the number data point in our  test samples
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

#Shuffle the training and validation dataset
shuffle_train_and_validation_data = train_and_validation_data.shuffle(BUFFER_SIZE)


#create the train data
train_data = shuffle_train_and_validation_data.skip(num_validation_samples)

#create the validation data
validation_data = shuffle_train_and_validation_data.take(num_validation_samples)


#batching the training data

train_data = train_data.batch(BATCH_SIZE)

#No need to batch the validation and test data(we are not backpropagating on them ). Hence we will take all at once]
validation_data = validation_data.batch(num_validation_samples)

test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))




 

In [51]:
#Define the hyperparamets to tune and the variations we want to test.
HP_FILTER_SIZE = hp.HParam('filter_size', hp.Discrete([3,5,7]))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_FILTER_SIZE, HP_OPTIMIZER], 
        metrics= [hp.Metric(METRIC_ACCURACY, display_name='Accuracy')]
    )

In [52]:
#CREATE THE MODEL AND TRAIN IT

def train_model(hparams):
    
    model = Sequential([
            Conv2D(50, hparams[HP_FILTER_SIZE], activation= 'relu', input_shape=(28,28,1)),
            MaxPooling2D(pool_size=(2,2)),
            Conv2D(50, hparams[HP_FILTER_SIZE], activation= 'relu'),
            MaxPooling2D(pool_size=(2,2)),
            Flatten(),
            Dense(10)
    ])
    
    
#describe the loss function
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#Compile the model

    model.compile(optimizer=hparams[HP_OPTIMIZER], loss=loss_fn, metrics=[METRIC_ACCURACY])
        

#Stop the model from overfitting ie whenever the validation loss increases
#the code tells the model to stop when the val_loss starts to increase in two subsequent epics

    early_stopping = EarlyStopping(
        monitor= 'val_loss',
        mode = 'auto',
        min_delta = 0,
        patience = 2,
        verbose = 0,
        restore_best_weights = True
    )
    
#    log_dir = "logs/hparam_tuning"
#    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        
#Train the model
    model.fit(
        train_data,
        epochs = NUM_EPOCHS,
        callbacks = [early_stopping],
        validation_data = validation_data,
        verbose =2
    )

    _,accuracy = model.evaluate(test_data)
        
    return accuracy
   

In [53]:
def run(log_dir,hparams):
    
    with tf.summary.create_file_writer(log_dir).as_default():
        hp.hparams(hparams) #record the values used in this trial
        accuracy = train_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [54]:
#Train the model with the different hyperparameters
session_num = 0
for filter_size in HP_FILTER_SIZE.domain.values:
    for optimizer in HP_OPTIMIZER.domain.values:
        hparams ={
        HP_FILTER_SIZE: filter_size,
        HP_OPTIMIZER : optimizer
        }
        
        run_name = "run-%d" % session_num
        print('---Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        run('logs/hparam_tuning/' + run_name, hparams)
        
        session_num += 1

        

---Starting trial: run-0
{'filter_size': 3, 'optimizer': 'adam'}
Epoch 1/20
422/422 - 61s - loss: 0.2971 - accuracy: 0.9150 - val_loss: 0.0957 - val_accuracy: 0.9722 - 61s/epoch - 144ms/step
Epoch 2/20
422/422 - 51s - loss: 0.0779 - accuracy: 0.9766 - val_loss: 0.0655 - val_accuracy: 0.9780 - 51s/epoch - 122ms/step
Epoch 3/20
422/422 - 55s - loss: 0.0586 - accuracy: 0.9821 - val_loss: 0.0442 - val_accuracy: 0.9875 - 55s/epoch - 129ms/step
Epoch 4/20
422/422 - 55s - loss: 0.0475 - accuracy: 0.9850 - val_loss: 0.0426 - val_accuracy: 0.9858 - 55s/epoch - 129ms/step
Epoch 5/20
422/422 - 59s - loss: 0.0411 - accuracy: 0.9878 - val_loss: 0.0348 - val_accuracy: 0.9907 - 59s/epoch - 140ms/step
Epoch 6/20
422/422 - 60s - loss: 0.0349 - accuracy: 0.9892 - val_loss: 0.0210 - val_accuracy: 0.9935 - 60s/epoch - 143ms/step
Epoch 7/20
422/422 - 60s - loss: 0.0316 - accuracy: 0.9900 - val_loss: 0.0263 - val_accuracy: 0.9920 - 60s/epoch - 142ms/step
Epoch 8/20
422/422 - 54s - loss: 0.0270 - accuracy: 0

Epoch 5/20
422/422 - 40s - loss: 0.0292 - accuracy: 0.9909 - val_loss: 0.0268 - val_accuracy: 0.9925 - 40s/epoch - 95ms/step
Epoch 6/20
422/422 - 41s - loss: 0.0257 - accuracy: 0.9922 - val_loss: 0.0142 - val_accuracy: 0.9960 - 41s/epoch - 97ms/step
Epoch 7/20
422/422 - 40s - loss: 0.0219 - accuracy: 0.9932 - val_loss: 0.0138 - val_accuracy: 0.9963 - 40s/epoch - 96ms/step
Epoch 8/20
422/422 - 40s - loss: 0.0179 - accuracy: 0.9942 - val_loss: 0.0192 - val_accuracy: 0.9945 - 40s/epoch - 95ms/step
Epoch 9/20
422/422 - 41s - loss: 0.0147 - accuracy: 0.9954 - val_loss: 0.0137 - val_accuracy: 0.9955 - 41s/epoch - 96ms/step
Epoch 10/20
422/422 - 41s - loss: 0.0123 - accuracy: 0.9959 - val_loss: 0.0107 - val_accuracy: 0.9975 - 41s/epoch - 97ms/step
Epoch 11/20
422/422 - 40s - loss: 0.0108 - accuracy: 0.9963 - val_loss: 0.0095 - val_accuracy: 0.9972 - 40s/epoch - 96ms/step
Epoch 12/20
422/422 - 40s - loss: 0.0084 - accuracy: 0.9974 - val_loss: 0.0074 - val_accuracy: 0.9973 - 40s/epoch - 95ms/st

In [49]:
# model.summary(line_length=75)

Model: "sequential_5"
___________________________________________________________________________
 Layer (type)                    Output Shape                  Param #     
 conv2d_10 (Conv2D)              (None, 24, 24, 50)            1300        
                                                                           
 max_pooling2d_10 (MaxPooling2D)  (None, 12, 12, 50)           0           
                                                                           
 conv2d_11 (Conv2D)              (None, 10, 10, 50)            22550       
                                                                           
 max_pooling2d_11 (MaxPooling2D)  (None, 5, 5, 50)             0           
                                                                           
 flatten_5 (Flatten)             (None, 1250)                  0           
                                                                           
 dense_5 (Dense)                 (None, 10)                    125

In [67]:
# # Create a Confusion Matrix

# for images, labels in validation_data:
#     val_images = images.numpy()
#     val_labels = labels.numpy()
    
# val_predict_raw= model.predict(val_images)
# val_predict = np.argmax(val_predict_raw, axis =1)


# cm = confusion_matrix(val_labels, val_predict)

# # Define class labels
# class_labels = ["Class 0", "Class 1", "Class 2","Class 3","Class 4","Class 5","Class 6","Class 7","Class 8","Class 9"]

# # Plot the confusion matrix using Seaborn
# plt.figure(figsize=(6, 6))
# sns.set(font_scale=1.2)  # Adjust the font size
# sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", square=True,
#             xticklabels=class_labels,
#             yticklabels=class_labels)


# # disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
# # disp.plot(cmap="Blues", values_format="d")

# plt.xlabel('Predicted')
# plt.ylabel('Actual')
# plt.title('Confusion Matrix')
# plt.show()



In [57]:
# #VISUALIZING IN TENSORBOARD BY 

# #Get the current date and time as a string
# current_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# 
# #Create a log directory to create a directory of each run using the current date and time.

# log_dir = f'./logs/{current_datetime}'

# #create a tensorboard callback
# tensorboard_callback = tf.keras.callback.TensorBoard(log_dir=log_dir,histogram_freq=1)



Epoch 1/20
422/422 - 45s - loss: 0.2633 - accuracy: 0.9249 - val_loss: 0.0903 - val_accuracy: 0.9740 - 45s/epoch - 106ms/step
Epoch 2/20
422/422 - 44s - loss: 0.0725 - accuracy: 0.9787 - val_loss: 0.0528 - val_accuracy: 0.9847 - 44s/epoch - 104ms/step
Epoch 3/20
422/422 - 43s - loss: 0.0528 - accuracy: 0.9840 - val_loss: 0.0373 - val_accuracy: 0.9905 - 43s/epoch - 101ms/step
Epoch 4/20
422/422 - 44s - loss: 0.0432 - accuracy: 0.9871 - val_loss: 0.0348 - val_accuracy: 0.9890 - 44s/epoch - 104ms/step
Epoch 5/20
422/422 - 43s - loss: 0.0371 - accuracy: 0.9891 - val_loss: 0.0302 - val_accuracy: 0.9912 - 43s/epoch - 101ms/step
Epoch 6/20
422/422 - 43s - loss: 0.0329 - accuracy: 0.9901 - val_loss: 0.0335 - val_accuracy: 0.9882 - 43s/epoch - 102ms/step
Epoch 7/20
422/422 - 44s - loss: 0.0286 - accuracy: 0.9910 - val_loss: 0.0240 - val_accuracy: 0.9928 - 44s/epoch - 104ms/step
Epoch 8/20
422/422 - 44s - loss: 0.0239 - accuracy: 0.9924 - val_loss: 0.0167 - val_accuracy: 0.9950 - 44s/epoch - 103

<keras.callbacks.History at 0x2b1c9c0bd30>

In [63]:
# #Test the model
# test_loss, test_accuracy = model.evaluate(test_data)

# print(f'Test Loss: {round(test_loss,2)}. Test Accuracy:{round(test_accuracy*100,2)}%')

In [64]:

# #PLOT THE IMAGE AND RESULT

# # Get and display the nth images from the training dataset. n=1
# num_images_to_display = 1
# for image, label in test_data.take(num_images_to_display):
#     sample_image = image.numpy()# Convert to numpy array and remove singleton dimensions
#     sample_label = label.numpy()

# original_images = sample_image.reshape(10000, 28, 28)

# # Display the image using matplotlib
# image_number_to_display = 456

# plt.figure(figsize=(2,2))
# plt.imshow(sample_image[image_number_to_display], cmap='gray', aspect="auto")
# plt.title(f'Label: {sample_label[image_number_to_display]}')
# plt.axis('off')  # Turn off axis
# plt.show()

In [65]:
# #Obtain the model's prediction(logits)

# #Using slicing is important because tensorflow expects inputs to be in batches.
# predictions = model.predict(sample_image[image_number_to_display:image_number_to_display+1])

# #Convert the predictions into probabilities and to percentage
# probabilities = (tf.nn.softmax(predictions).numpy()) *100

# #Create a bar chart to plot the probabilities for each class
# plt.figure(figsize=(12,5))
# plt.bar(x=[1,2,3,4,5,6,7,8,9,10], height=probabilities[0], tick_label=["0", "1","2","3","4","5","6","7","8","9"])

In [62]:
#Load Tensorboard using the commands
%load_ext tensorboard
%tensorboard --logdir "logs/hparam_tuning"



The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
