Importing all necessary libraries

In [151]:
import numpy as np
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

Loading and preprocessing the dataset

In [152]:
def load_data():
    (X_train, Y_train), (X_test, Y_test) = mnist.load_data()

    X_train = X_train.reshape(X_train.shape[0], -1).T / 255.0
    X_test = X_test.reshape(X_test.shape[0], -1).T / 255.0
    
    Y_train = one_hot(Y_train)
    Y_test = one_hot(Y_test)

    return X_train, Y_train, X_test, Y_test


One-hot encoding labels

In [153]:
def one_hot(labels, num_classes=10):
    one_hot = np.zeros((num_classes, labels.size))
    one_hot[labels, np.arange(labels.size)] = 1
    return one_hot


Initializing weights and biases

In [154]:
def init(a=784, b=128, c=10):
    np.random.seed(42)
    w1 = np.random.randn(b, a) * 0.01
    b1 = np.zeros((b, 1))
    w2 = np.random.randn(c, b) * 0.01
    b2 = np.zeros((c, 1))
    return w1, b1, w2, b2


Defining the activation function

In [155]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    return x * (1 - x)


Forward propagation

In [156]:
def forprop(X, w1, b1, w2, b2):
    hi = np.dot(w1, X) + b1
    ho = sigmoid(hi)
    oi = np.dot(w2, ho) + b2
    oo = sigmoid(oi)
    return hi, ho, oi, oo

Backward propagation

In [157]:
def backprop(X, Y, hi, ho, oi, oo, w1, w2, b1, b2, learn=0.01):
    m = X.shape[1] 

    eo = oo - Y
    dw2 = (1 / m) * np.dot(eo, ho.T)
    db2 = (1 / m) * np.sum(eo, axis=1, keepdims=True)

    eh = np.dot(w2.T, eo)
    dh = eh * d_sigmoid(ho)
    dw1 = (1 / m) * np.dot(dh, X.T)
    db1 = (1 / m) * np.sum(dh, axis=1, keepdims=True)

    w1 -= learn * dw1
    b1 -= learn * db1
    w2 -= learn * dw2
    b2 -= learn * db2

    return w1, b1, w2, b2


Training

In [158]:
def train(X_train, Y_train, w1, b1, w2, b2, epochs=10, batch_size=64, learn=0.01):
    for epoch in range(epochs):
        combo = np.random.permutation(X_train.shape[1])
        X_shuffled = X_train[:, combo]
        Y_shuffled = Y_train[:, combo]

        for i in range(0, X_train.shape[1], batch_size):
            X_batch = X_shuffled[:, i:i + batch_size]
            Y_batch = Y_shuffled[:, i:i + batch_size]

            hi, ho, oi, oo = forprop(X_batch, w1, b1, w2, b2)
            w1, b1, w2, b2 = backprop(X_batch, Y_batch, hi, ho, oi, oo, w1, w2, b1, b2, learn)

        if epoch % 10 == 0: 
            hi, ho, oi, oo = forprop(X_train, w1, b1, w2, b2)
            loss = -np.mean(Y_train * np.log(oo + 1e-8))  # Cross-entropy loss
            print(f"Epoch {epoch}, Loss: {loss}")

    return w1, b1, w2, b2


Evaluating the model

In [159]:
def acc(X_test, Y_test, w1, b1, w2, b2):
    hi, ho, oi, oo = forprop(X_test, w1, b1, w2, b2)
    predictions = np.argmax(oo, axis=0)
    labels = np.argmax(Y_test, axis=0)
    accuracy = np.mean(predictions == labels) * 100
    print(f"Test Accuracy: {accuracy}%")
    
    return accuracy


Visualizing incorrect predictions

In [160]:
def incorrect(X, Y, w1, b1, w2, b2):
    hi, ho, oi, oo = forprop(X, w1, b1, w2, b2)
    predictions = np.argmax(oo, axis=0)
    labels = np.argmax(Y, axis=0)
    misclassified = np.where(predictions != labels)[0]

    plt.figure(figsize=(10, 5))
    for i, idx in enumerate(misclassified[:10]):
        plt.subplot(2, 5, i + 1)
        plt.imshow(X[:, idx].reshape(28, 28), cmap='gray')
        plt.title(f"Pred: {predictions[idx]}, True: {labels[idx]}")
        plt.axis('off')
    plt.show()


Visualizing correct predictions

In [161]:
def correct(X, Y, w1, b1, w2, b2):
    hi, ho, oi, oo = forprop(X, w1, b1, w2, b2)
    predictions = np.argmax(oo, axis=0)
    labels = np.argmax(Y, axis=0)
    classified = np.where(predictions == labels)[0]

    plt.figure(figsize=(10, 5))
    for i, idx in enumerate(classified[:10]):
        plt.subplot(2, 5, i + 1)
        plt.imshow(X[:, idx].reshape(28, 28), cmap='gray')
        plt.title(f"Pred: {predictions[idx]}, True: {labels[idx]}")
        plt.axis('off')
    plt.show()


Loading and executing

In [162]:
a=784
b=128 
c=10
epochs=500
batch_size=64
learn=0.1

In [163]:
X_train, Y_train, X_test, Y_test = load_data()
w1, b1, w2, b2 = init()
w1, b1, w2, b2 = train(X_train, Y_train, w1, b1, w2, b2, epochs=500, batch_size=64, learn=0.1)
acc(X_test, Y_test, w1, b1, w2, b2)
correct(X_test, Y_test, w1, b1, w2, b2)
incorrect(X_test, Y_test, w1, b1, w2, b2)


Epoch 0, Loss: 0.06336037318226236
Epoch 10, Loss: 0.021301990032514977
Epoch 20, Loss: 0.014404007298159617


KeyboardInterrupt: 