## WAP to evaluate the performance of implemented three-layer neural network with variations in activation functions, size of hidden layer, learning rate, batch size and number of epochs.

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import time
from sklearn.metrics import confusion_matrix

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is available and memory growth is enabled.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found. Running on CPU.")

tf.compat.v1.disable_eager_execution()

# Load dataset
mnist, info = tfds.load('mnist', with_info=True, as_supervised=True)

# Parameters
batch_size = 100
epochs = 50



GPU is available and memory growth is enabled.


In [None]:
activations = ['relu']
hidden_layer_sizes_double_layer = [(160,100), (100,160), (100,100), (100,60), (60,60)]
learning_rates = [0.1, 0.01, 0.001]

def activation_function(x):
    return tf.nn.relu(x)

# Dataset Prepare
def preprocess(image, label):
    image = tf.reshape(image, [784])
    image = tf.cast(image, tf.float32) / 255.0
    label = tf.one_hot(label, 10)
    return image, label

# training function
def train_and_evaluate(hidden_layers, learning_rate):
    start_time = time.time()  # Start timer

    train_data = mnist['train'].map(preprocess).shuffle(60000).batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    test_data = mnist['test'].map(preprocess).batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)

    iterator = tf.compat.v1.data.make_initializable_iterator(train_data)
    next_element = iterator.get_next()

    # Placeholders
    X = tf.compat.v1.placeholder(tf.float32, [None, 784])
    Y = tf.compat.v1.placeholder(tf.float32, [None, 10])

    # Initialize weights and biases
    weights = {}
    biases = {}
    prev_size = 784

    # network structure
    with tf.device('/GPU:0'):

        layer = X
        for i, size in enumerate(hidden_layers):
            weights[f'h{i+1}'] = tf.Variable(tf.random.normal([prev_size, size]))
            biases[f'b{i+1}'] = tf.Variable(tf.random.normal([size]))
            layer = activation_function(tf.add(tf.matmul(layer, weights[f'h{i+1}']), biases[f'b{i+1}']))
            prev_size = size

        # Output layer
        weights['out'] = tf.Variable(tf.random.normal([prev_size, 10]))
        biases['out'] = tf.Variable(tf.random.normal([10]))
        logits = tf.add(tf.matmul(layer, weights['out']), biases['out'])

        # Loss and optimizer
        loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss_op)

        # Accuracy
        correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Initialize session
    init = tf.compat.v1.global_variables_initializer()
    loss_history = []
    accuracy_history = []

    with tf.compat.v1.Session() as sess:
        sess.run(init)
        sess.run(iterator.initializer)

        for epoch in range(epochs):
            try:
                while True:
                    batch_x, batch_y = sess.run(next_element)
                    _, loss, acc = sess.run([train_op, loss_op, accuracy], feed_dict={X: batch_x, Y: batch_y})
                    loss_history.append(loss)
                    accuracy_history.append(acc)
            except tf.errors.OutOfRangeError:
                sess.run(iterator.initializer)

        print(f"Completed: Layers={hidden_layers}, LR={learning_rate}")

        # Evaluation on test data
        test_iterator = tf.compat.v1.data.make_initializable_iterator(test_data)
        next_test_element = test_iterator.get_next()
        sess.run(test_iterator.initializer)
        test_acc = 0
        test_count = 0
        y_true, y_pred = [], []

        while True:
            try:
                test_images, test_labels = sess.run(next_test_element)
                acc, preds = sess.run([accuracy, tf.argmax(logits, 1)], feed_dict={X: test_images, Y: test_labels})
                y_true.extend(np.argmax(test_labels, axis=1))
                y_pred.extend(preds)
                test_acc += acc
                test_count += 1
            except tf.errors.OutOfRangeError:
                break

        test_acc /= test_count

        cm = confusion_matrix(y_true, y_pred)

        # Execution time
        execution_time = time.time() - start_time

        return loss_history, accuracy_history, test_acc, cm, execution_time



In [None]:
results_double_layer = []

for hidden_sizes in hidden_layer_sizes_double_layer:
    for lr in learning_rates:
        loss_hist, acc_hist, test_acc, cm, exec_time = train_and_evaluate(hidden_sizes, lr)
        results_double_layer.append([hidden_sizes, lr, loss_hist[-1], acc_hist[-1], test_acc, cm, exec_time])

columns = ["Hidden Layers", "Learning Rate", "Final Loss", "Final Accuracy", "Test Accuracy", "Confusion Matrix", "Execution Time"]

df_double_layer = pd.DataFrame(results_double_layer, columns=columns)

print("\nDouble Layer Results:")
display(df_double_layer)

Completed: Layers=(160, 100), LR=0.1
Completed: Layers=(160, 100), LR=0.01
Completed: Layers=(160, 100), LR=0.001
Completed: Layers=(100, 160), LR=0.1
Completed: Layers=(100, 160), LR=0.01
Completed: Layers=(100, 160), LR=0.001
Completed: Layers=(100, 100), LR=0.1
Completed: Layers=(100, 100), LR=0.01
Completed: Layers=(100, 100), LR=0.001
Completed: Layers=(100, 60), LR=0.1
Completed: Layers=(100, 60), LR=0.01
Completed: Layers=(100, 60), LR=0.001
Completed: Layers=(60, 60), LR=0.1
Completed: Layers=(60, 60), LR=0.01
Completed: Layers=(60, 60), LR=0.001

Double Layer Results:


Unnamed: 0,Hidden Layers,Learning Rate,Final Loss,Final Accuracy,Test Accuracy,Confusion Matrix,Execution Time
0,"(160, 100)",0.1,1.854606,0.16,0.1892,"[[977, 0, 0, 0, 0, 0, 0, 3, 0, 0], [1134, 0, 0...",359.191833
1,"(160, 100)",0.01,0.0219,0.99,0.9713,"[[968, 0, 1, 1, 0, 1, 2, 0, 6, 1], [0, 1123, 2...",369.017422
2,"(160, 100)",0.001,0.0,1.0,0.9549,"[[963, 1, 1, 3, 1, 1, 2, 4, 2, 2], [2, 1116, 4...",355.799519
3,"(100, 160)",0.1,1.910809,0.15,0.1754,"[[0, 0, 0, 0, 0, 957, 0, 0, 0, 23], [0, 0, 0, ...",385.833386
4,"(100, 160)",0.01,0.009795,1.0,0.9697,"[[966, 0, 2, 0, 1, 0, 2, 0, 9, 0], [0, 1118, 2...",352.839713
5,"(100, 160)",0.001,0.21999,0.99,0.9551,"[[960, 0, 3, 5, 1, 1, 4, 1, 3, 2], [1, 1115, 3...",360.344357
6,"(100, 100)",0.1,1.782551,0.2,0.1905,"[[0, 0, 0, 0, 0, 0, 979, 1, 0, 0], [0, 0, 0, 0...",404.978854
7,"(100, 100)",0.01,0.005613,1.0,0.97,"[[965, 0, 2, 0, 0, 3, 3, 2, 4, 1], [0, 1121, 1...",400.011591
8,"(100, 100)",0.001,0.00115,1.0,0.9495,"[[964, 0, 4, 1, 1, 5, 2, 2, 1, 0], [2, 1112, 6...",412.699556
9,"(100, 60)",0.1,2.317551,0.11,0.1135,"[[0, 980, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1135, 0...",431.826434


My Comments

1) Not using the Regularization to prevent from Overfitting.   
2) We can try with different activation functions like sigmoid, LeakyReLU.  
3) The model is always trained for 50 epochs without early stopping.  
4) We can also try with samll batch size (32)