In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist  # Load MNIST dataset

# Load and preprocess the dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train.reshape(-1, 784).astype(np.float32) / 255.0, x_test.reshape(-1, 784).astype(np.float32) / 255.0

# Convert labels to one-hot encoding
num_classes = 10
y_train = np.eye(num_classes)[y_train]
y_test = np.eye(num_classes)[y_test]

# Define network parameters
n_input = 784    # Input layer (28x28 pixels)
n_hidden1 = 128  # First hidden layer neurons
n_hidden2 = 64   # Second hidden layer neurons
n_output = 10    # Output layer (10 digits)
learning_rate = 0.01
n_epochs = 10
batch_size = 100

# Initialize weights and biases
weights = {
    'h1': tf.Variable(tf.random.normal([n_input, n_hidden1], stddev=0.1)),
    'h2': tf.Variable(tf.random.normal([n_hidden1, n_hidden2], stddev=0.1)),
    'out': tf.Variable(tf.random.normal([n_hidden2, n_output], stddev=0.1))
}

biases = {
    'b1': tf.Variable(tf.zeros([n_hidden1])),
    'b2': tf.Variable(tf.zeros([n_hidden2])),
    'out': tf.Variable(tf.zeros([n_output]))
}

# Sigmoid Activation Function
def sigmoid(x):
    return tf.nn.sigmoid(x)

# Define the feedforward neural network function
def neural_network(x):
    layer_1 = sigmoid(tf.matmul(x, weights['h1']) + biases['b1'])  # First hidden layer
    layer_2 = sigmoid(tf.matmul(layer_1, weights['h2']) + biases['b2'])  # Second hidden layer
    output_layer = tf.matmul(layer_2, weights['out']) + biases['out']  # Output layer (raw logits)
    return output_layer

# Loss function: Softmax Cross-Entropy
def compute_loss(y_true, y_pred):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true))

# Optimizer
optimizer = tf.optimizers.Adam(learning_rate)

# Prepare training dataset in batches
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(60000).batch(batch_size)

# Training loop
for epoch in range(n_epochs):
    avg_loss = 0.0
    for batch_x, batch_y in train_dataset:
        with tf.GradientTape() as tape:
            predictions = neural_network(batch_x)
            loss = compute_loss(batch_y, predictions)
        
        gradients = tape.gradient(loss, list(weights.values()) + list(biases.values()))
        optimizer.apply_gradients(zip(gradients, list(weights.values()) + list(biases.values())))
        avg_loss += loss.numpy() / len(train_dataset)

    # Compute training accuracy
    correct_preds = tf.equal(tf.argmax(neural_network(x_train), axis=1), tf.argmax(y_train, axis=1))
    train_acc = tf.reduce_mean(tf.cast(correct_preds, tf.float32)).numpy()
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}, Training Accuracy: {train_acc:.4f}")

# Evaluate model on test data
test_preds = neural_network(x_test)
correct_test_preds = tf.equal(tf.argmax(test_preds, axis=1), tf.argmax(y_test, axis=1))
test_acc = tf.reduce_mean(tf.cast(correct_test_preds, tf.float32)).numpy()
print(f"Test Accuracy: {test_acc:.4f}")


Epoch 1, Loss: 0.2865, Training Accuracy: 0.9613
Epoch 2, Loss: 0.1123, Training Accuracy: 0.9742
Epoch 3, Loss: 0.0852, Training Accuracy: 0.9762
Epoch 4, Loss: 0.0669, Training Accuracy: 0.9834
Epoch 5, Loss: 0.0559, Training Accuracy: 0.9837
Epoch 6, Loss: 0.0520, Training Accuracy: 0.9848
Epoch 7, Loss: 0.0473, Training Accuracy: 0.9855
Epoch 8, Loss: 0.0440, Training Accuracy: 0.9904
Epoch 9, Loss: 0.0415, Training Accuracy: 0.9894
Epoch 10, Loss: 0.0378, Training Accuracy: 0.9889
Test Accuracy: 0.9740


Scope of improvement:

Replace Sigmoid with Better Activations such as ReLU (Rectified Linear Unit) which is computationally more efficient and avoids saturation issues.