<a href="https://colab.research.google.com/github/code-1-mukul/Deep-Learning-Lab/blob/main/DL_LAB_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup and Load dataset

In [1]:
import numpy as np
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784).astype(np.float32)
X_test  = X_test.reshape(-1, 784).astype(np.float32)

X_train /= 255.0
X_test  /= 255.0

def one_hot(y, num_classes=10):
    oh = np.zeros((y.shape[0], num_classes))
    oh[np.arange(y.shape[0]), y] = 1
    return oh

Y_train = one_hot(y_train, 10)
Y_test  = one_hot(y_test, 10)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


### Activation & Loss

In [2]:
def relu(z):
    return np.maximum(0, z)

def relu_deriv(z):
    return (z > 0).astype(np.float32)

def softmax(z):
    z = z - np.max(z, axis=1, keepdims=True)  # stability
    exp = np.exp(z)
    return exp / np.sum(exp, axis=1, keepdims=True)

def cross_entropy(y_true, y_pred):
    eps = 1e-9
    return -np.mean(np.sum(y_true * np.log(y_pred + eps), axis=1))


### Multi Layer Perceptron

In [3]:
class MLP:
    def __init__(self, in_dim=784, h1=128, h2=64, out_dim=10, lr=0.01):
        self.lr = lr
        self.W1 = 0.01 * np.random.randn(in_dim, h1)
        self.b1 = np.zeros((1, h1))
        self.W2 = 0.01 * np.random.randn(h1, h2)
        self.b2 = np.zeros((1, h2))
        self.W3 = 0.01 * np.random.randn(h2, out_dim)
        self.b3 = np.zeros((1, out_dim))

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1      # (B,128)
        self.A1 = relu(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2  # (B,64)
        self.A2 = relu(self.Z2)
        self.Z3 = self.A2 @ self.W3 + self.b3  # (B,10)
        self.Yhat = softmax(self.Z3)
        return self.Yhat

    def backward(self, X, Y):
        B = X.shape[0]

        dZ3 = (self.Yhat - Y) / B
        dW3 = self.A2.T @ dZ3
        db3 = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = dZ3 @ self.W3.T
        dZ2 = dA2 * relu_deriv(self.Z2)
        dW2 = self.A1.T @ dZ2
        db2 = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * relu_deriv(self.Z1)
        dW1 = X.T @ dZ1
        db1 = np.sum(dZ1, axis=0, keepdims=True)

        self.W3 -= self.lr * dW3; self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2; self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1; self.b1 -= self.lr * db1

    def fit(self, X, Y, epochs=10, batch_size=64):
        n = X.shape[0]
        for ep in range(epochs):
            idx = np.random.permutation(n)
            Xs, Ys = X[idx], Y[idx]
            for i in range(0, n, batch_size):
                xb = Xs[i:i+batch_size]
                yb = Ys[i:i+batch_size]
                self.forward(xb)
                self.backward(xb, yb)

            preds = self.forward(X)
            loss = cross_entropy(Y, preds)
            acc = np.mean(np.argmax(preds,1) == np.argmax(Y,1))
            print(f"Epoch {ep+1:02d} | loss={loss:.4f} | acc={acc:.4f}")

    def predict(self, X):
        return np.argmax(self.forward(X), axis=1)


### Train & Evaluate

In [4]:
mlp = MLP(lr=0.01)
mlp.fit(X_train, Y_train, epochs=15, batch_size=128)

test_preds = mlp.predict(X_test)
test_acc = np.mean(test_preds == y_test)
print("Test accuracy:", test_acc)


Epoch 01 | loss=2.3014 | acc=0.1124
Epoch 02 | loss=2.3006 | acc=0.1124
Epoch 03 | loss=2.2997 | acc=0.1124
Epoch 04 | loss=2.2977 | acc=0.1124
Epoch 05 | loss=2.2907 | acc=0.1380
Epoch 06 | loss=2.2368 | acc=0.2306
Epoch 07 | loss=2.0075 | acc=0.3186
Epoch 08 | loss=1.3948 | acc=0.5189
Epoch 09 | loss=1.0589 | acc=0.6544
Epoch 10 | loss=0.8133 | acc=0.7395
Epoch 11 | loss=0.7025 | acc=0.7732
Epoch 12 | loss=0.6410 | acc=0.8016
Epoch 13 | loss=0.5955 | acc=0.8217
Epoch 14 | loss=0.5546 | acc=0.8370
Epoch 15 | loss=0.5112 | acc=0.8535
Test accuracy: 0.8545
