## Week - 10
### Ex 10 - Neural networks

In [1]:
import numpy as np
from tensorflow.keras.datasets import mnist

In [2]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

train_images = train_images.reshape((-1, 28 * 28))
test_images = test_images.reshape((-1, 28 * 28))

train_labels_onehot = np.eye(10)[train_labels]
test_labels_onehot = np.eye(10)[test_labels]

input_size = 28 * 28
hidden_size = 512
output_size = 10

In [3]:
def xavier_init(input_size, output_size):
    limit = np.sqrt(6 / (input_size + output_size))
    return np.random.uniform(-limit, limit, (input_size, output_size))

W1 = xavier_init(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))

W2 = xavier_init(hidden_size, output_size)
b2 = np.zeros((1, output_size))

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

def compute_loss(y_pred, y_true):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

def backpropagation(X, y_true, a1, a2):
    m = X.shape[0]

    dZ2 = a2 - y_true
    dW2 = np.dot(a1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * (a1 > 0)  # ReLU derivative
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2

def forward_pass(X):
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)
    return a1, a2

In [4]:
def train(X_train, y_train, X_test, y_test, epochs=20, batch_size=64, learning_rate=0.001):
    global W1, b1, W2, b2

    num_samples = X_train.shape[0]

    for epoch in range(epochs):
        indices = np.random.permutation(num_samples)
        X_train = X_train[indices]
        y_train = y_train[indices]

        for i in range(0, num_samples, batch_size):
            X_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]

            z1 = np.dot(X_batch, W1) + b1
            a1 = relu(z1)
            z2 = np.dot(a1, W2) + b2
            a2 = softmax(z2)

            loss = compute_loss(a2, y_batch)

            dW1, db1, dW2, db2 = backpropagation(X_batch, y_batch, a1, a2)

            W1 -= learning_rate * dW1
            b1 -= learning_rate * db1
            W2 -= learning_rate * dW2
            b2 -= learning_rate * db2

        if (epoch + 1) % 1 == 0:
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

            _, a2_test = forward_pass(X_test)
            predictions = np.argmax(a2_test, axis=1)
            accuracy = np.mean(predictions == np.argmax(y_test, axis=1))
            print(f"Test Accuracy after epoch {epoch + 1}: {accuracy * 100:.2f}%")

train(train_images, train_labels_onehot, test_images, test_labels_onehot, epochs=20, batch_size=64, learning_rate=0.001)

Epoch 1/20, Loss: 1.3342
Test Accuracy after epoch 1: 72.11%
Epoch 2/20, Loss: 1.1018
Test Accuracy after epoch 2: 80.84%
Epoch 3/20, Loss: 0.8175
Test Accuracy after epoch 3: 83.84%
Epoch 4/20, Loss: 0.8876
Test Accuracy after epoch 4: 85.46%
Epoch 5/20, Loss: 0.6084
Test Accuracy after epoch 5: 86.71%
Epoch 6/20, Loss: 0.5309
Test Accuracy after epoch 6: 87.52%
Epoch 7/20, Loss: 0.4861
Test Accuracy after epoch 7: 88.07%
Epoch 8/20, Loss: 0.4742
Test Accuracy after epoch 8: 88.39%
Epoch 9/20, Loss: 0.4429
Test Accuracy after epoch 9: 88.84%
Epoch 10/20, Loss: 0.4851
Test Accuracy after epoch 10: 89.26%
Epoch 11/20, Loss: 0.4906
Test Accuracy after epoch 11: 89.42%
Epoch 12/20, Loss: 0.2972
Test Accuracy after epoch 12: 89.67%
Epoch 13/20, Loss: 0.3397
Test Accuracy after epoch 13: 89.86%
Epoch 14/20, Loss: 0.4192
Test Accuracy after epoch 14: 90.03%
Epoch 15/20, Loss: 0.2157
Test Accuracy after epoch 15: 90.22%
Epoch 16/20, Loss: 0.3814
Test Accuracy after epoch 16: 90.38%
Epoch 17/2