In [None]:
import tensorflow as tf
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import os
from google.colab import drive  #comment while using storing in local directory..

# Mount Google Drive
drive.mount('/content/drive') # ask for google drive path.
output_dir = "/content/drive/My Drive/mnist_results" # make and directory in drive
os.makedirs(output_dir, exist_ok=True)

# output_dir = "mnist_results"  # Change this to a local directory if running on a personal PC
# # os.makedirs(output_dir, exist_ok=True)

# Disable eager execution
tf.compat.v1.disable_eager_execution()

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train.reshape(-1, 784) / 255.0, x_test.reshape(-1, 784) / 255.0     # reshape(-1,784) flatten(convert 2d image to one d) the image and divide it by 255.0 normalize the image

y_train_one_hot = np.eye(10)[y_train] 
y_test_one_hot = np.eye(10)[y_test]

# Model hyperparameters
input_size = 784
hidden_sizes = [256, 125, 64]
activations = {'sigmoid': tf.nn.sigmoid, 'relu': tf.nn.relu, 'tanh': tf.nn.tanh}
output_size = 10
learning_rate = 0.001
batch_size = 100
epochs = 100
dropout_rate = 0.5  # Dropout rate to prevent overfitting


# Iterate through different hidden layer sizes and activation functions
for hidden_size in hidden_sizes:
    for act_name, activation in activations.items():
        print(f"\nTraining with Hidden Size: {hidden_size}, Activation: {act_name}\n")

        # Define placeholders ---->  empty containers" that you define in advance to hold data when the program runs.
        X = tf.compat.v1.placeholder(tf.float32, [None, input_size]) # placeholder for input features.   || float32 is -->type of data , None --->  the number of inputs can vary  ,input_size -->the number of features for each input
        keep_prob = tf.compat.v1.placeholder(tf.float32)
        y = tf.compat.v1.placeholder(tf.float32, [None, output_size])# output_size  -->specifies the number of output class..
        keep_prob = tf.compat.v1.placeholder(tf.float32)   # <--- This placeholder is for the dropout rate, which helps prevent overfitting

        # Initialize weights and biases
        weights = {
            'w1': tf.Variable(tf.random.truncated_normal([input_size, hidden_size], stddev=0.1)), # initalizing weight randomly between 0 to 1 (from normal distribution)
            'w2': tf.Variable(tf.random.truncated_normal([hidden_size, output_size], stddev=0.1))# standard_Deviation --> Smaller values keep the weights closer to zero,
        }
        biases = {
            'b1': tf.Variable(tf.zeros([hidden_size])), #create a matrix of hidden size columns.   [0,0,0,...256]
            'b2': tf.Variable(tf.zeros([output_size]))
        }

        # Define neural network with dropout
        layer1 = activation(tf.matmul(X, weights['w1']) + biases['b1']) #
        layer1_drop = tf.nn.dropout(layer1, rate=1 - keep_prob) # The layer1, the dropout rate  || remove this line when you want to remove the dropout layer and in next line change (layer1_drop) to (layer1) and remove keep_prob  from anyline.
        logits = tf.matmul(layer1_drop, weights['w2']) + biases['b2'] # computes the final output , the output is 0 either 1.

        # Loss, optimizer, and accuracy
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
        correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        # Train the model
        losses, accuracies = [], []
        start_time = time.time()

        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer()) #assigns the initial values you defined for each variable

            for epoch in range(epochs):
                for i in range(0, len(x_train), batch_size):
                    batch_x, batch_y = x_train[i:i+batch_size], y_train_one_hot[i:i+batch_size] # batch_x --> batch of training image , batch_y--> for labels in one_hot encoding
                    sess.run(optimizer, feed_dict={X: batch_x, y: batch_y, keep_prob: 1 - dropout_rate}) # optimizer -->update the weight using paropogation   || X: batch_x -->takes input feature of batch_x ||  y: batch_y → Feeds the corresponding labels (batch_y) of the current batch to the placeholder y. || sess.run(...) executes the TensorFlow computation graph.||keep_prob: 1 - dropout_rate → Sets the dropout probability during training

                train_loss, train_acc = sess.run([loss, accuracy], feed_dict={X: x_train, y: y_train_one_hot, keep_prob: 1.0}) #The loss function measures how far the model's predictions are from the true labels   || Accuracy is the percentage of predictions that match the true labels. ||The  provides data to placeholders during runtime.  ses.run()--> return two values train_loss ,and train_Accuracy.
                losses.append(train_loss) #append the train_loss
                accuracies.append(train_acc)# append the train_Accuracy
                test_acc = sess.run(accuracy, feed_dict={X: x_test, y: y_test_one_hot, keep_prob: 1.0})  # test_Accuracy
                print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}%, Test Acc: {test_acc*100:.2f}%")

            # Compute final accuracy and confusion matrix
            final_test_preds = sess.run(tf.argmax(logits, 1), feed_dict={X: x_test, keep_prob: 1.0})
            execution_time = time.time() - start_time # calculate the execution_time
            print(f"Final Test Accuracy: {test_acc*100:.2f}%")
            print(f"Execution Time: {execution_time:.2f} seconds")

            # Save Loss Curve
            plt.figure()
            plt.plot(losses, label='Loss')
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
            plt.title(f'Loss Curve ({act_name}, {hidden_size})')
            plt.legend()
            loss_filename = os.path.join(output_dir, f"loss_curve_{act_name}_{hidden_size}.png")
            plt.savefig(loss_filename)
            plt.close()

            # Save Accuracy Curve
            plt.figure()
            plt.plot(accuracies, label='Train Accuracy')
            plt.xlabel('Epochs')
            plt.ylabel('Accuracy')
            plt.title(f'Accuracy Curve ({act_name}, {hidden_size})')
            plt.legend()
            acc_filename = os.path.join(output_dir, f"accuracy_curve_{act_name}_{hidden_size}.png")
            plt.savefig(acc_filename)
            plt.close()

            # Save Confusion Matrix
            plt.figure()
            cm = confusion_matrix(y_test, final_test_preds)
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
            plt.xlabel('Predicted Label')
            plt.ylabel('True Label')
            plt.title(f'Confusion Matrix ({act_name}, {hidden_size})')
            cm_filename = os.path.join(output_dir, f"confusion_matrix_{act_name}_{hidden_size}.png")
            plt.savefig(cm_filename)
            plt.close()

            # Save execution details to a text file
            txt_filename = os.path.join(output_dir, f"summary_{act_name}_{hidden_size}.txt")
            with open(txt_filename, "w") as f:
                f.write(f"Activation Function: {act_name}\n")
                f.write(f"Hidden Layer Size: {hidden_size}\n")
                f.write(f"Final Test Accuracy: {test_acc*100:.2f}%\n")
                f.write(f"Execution Time: {execution_time:.2f} seconds\n")
                f.write(f"Confusion Matrix:\n{cm}\n")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Training with Hidden Size: 256, Activation: sigmoid

Epoch 1, Loss: 0.3005, Train Acc: 91.33%, Test Acc: 91.54%
Epoch 2, Loss: 0.2291, Train Acc: 93.35%, Test Acc: 93.31%
Epoch 3, Loss: 0.1889, Train Acc: 94.50%, Test Acc: 94.31%
Epoch 4, Loss: 0.1582, Train Acc: 95.29%, Test Acc: 95.15%
Epoch 5, Loss: 0.1363, Train Acc: 95.96%, Test Acc: 95.75%
Epoch 6, Loss: 0.1190, Train Acc: 96.53%, Test Acc: 96.11%
Epoch 7, Loss: 0.1048, Train Acc: 96.95%, Test Acc: 96.46%
Epoch 8, Loss: 0.0941, Train Acc: 97.32%, Test Acc: 96.76%
Epoch 9, Loss: 0.0841, Train Acc: 97.59%, Test Acc: 96.96%
Epoch 10, Loss: 0.0751, Train Acc: 97.83%, Test Acc: 97.23%
Epoch 11, Loss: 0.0694, Train Acc: 97.99%, Test Acc: 97.34%
Epoch 12, Loss: 0.0634, Train Acc: 98.24%, Test Acc: 97.38%
Epoch 13, Loss: 0.0577, Train Acc: 98.36%, Test Acc: 97.46%
Epoch 14, Loss: 0.0551, Train Acc: 98.40%, Tes