<a href="https://colab.research.google.com/github/bilalzardoa/machine-learning/blob/main/mnist_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
np.set_printoptions(suppress=True)
import keras
import numpy as np

In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [None]:
x_train.shape , y_train.shape , x_test.shape , y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [None]:
x_train[0] # pixel values range from 0 (black) to 255 (white)

In [None]:
y_train[0]

np.uint8(5)

In [None]:
x_train = x_train.reshape(x_train.shape[0],28*28).T
y_train = y_train.flatten()  # Shape becomes (60000,)
print(x_train.shape,y_train.shape)

(784, 60000) (60000,)


In [None]:
# our input is of shape (28,28)
# since the formula of forward prop is w * x + b , w and x must be compatible for matrix multiplication
w1,w2 = np.random.randn(784,32) , np.random.randn(32,10)
b1 = np.zeros((10, 1))
b2 = np.zeros((10, 1))

print(w1.shape,w2.shape)

(784, 32) (32, 10)


In [None]:
def ReLu(x):
  return np.maximum(0,x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=0, keepdims=True))  # numerical stability
    return exp_x / np.sum(exp_x, axis=0, keepdims=True)



# make a prediction (A2)
def forward_propagation(x,w1,b1,w2,b2):
  Z1 = np.dot(w1.T,x) + b1
  A1 = ReLu(Z1)
  Z2 = np.dot(w2.T,A1) + b2
  A2 = softmax(Z2)
  return Z1,A1,Z2,A2

def one_hot(y):
  one_hot_y = np.zeros((y.size,y.max() + 1))
  one_hot_y[np.arange(y.size),y] = 1
  return one_hot_y.T

def deriv_ReLu(Z):
  return Z > 0

def backwards_propagation(Z1, A1, Z2, A2, w2, x, y):
    m = y.size
    one_hot_y = one_hot(y)  # shape: (10, m)

    dZ2 = A2 - one_hot_y                # shape: (10, m)
    dw2 = (1 / m) * np.dot(A1, dZ2.T)  # (128, m) · (m, 10) = (128, 10)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)  # shape: (10, 1)

    dZ1 = np.dot(w2, dZ2) * deriv_ReLu(Z1)  # w2: (128,10), dZ2: (10,m)
    dw1 = (1 / m) * np.dot(x, dZ1.T)  # (784, m) · (m, 128) = (784, 128)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)  # shape: (10, 1)

    return dw1, db1, dw2, db2

def update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, lr=0.01):
    w1 -= lr * dw1
    b1 -= lr * db1
    w2 -= lr * dw2
    b2 -= lr * db2
    return w1, b1, w2, b2

def get_prediction(A2):
  return np.argmax(A2,0)

def get_accuracy(predictions, labels):
    return np.mean(predictions == labels)

def gradient_descent(x, y, epochs, lr=0.001):
   # Good default sizes:
    w1 = np.random.randn(784, 128) * 0.01
    w2 = np.random.randn(128, 10) * 0.01
    b1 = np.zeros((128, 1))
    b2 = np.zeros((10, 1))


    for i in range(epochs):
        Z1, A1, Z2, A2 = forward_propagation(x, w1, b1, w2, b2)
        dw1, db1, dw2, db2 = backwards_propagation(Z1, A1, Z2, A2, w2, x, y)
        w1, b1, w2, b2 = update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, lr)

        if i % 1 == 0:
            predictions = get_prediction(A2)
            acc = get_accuracy(predictions, y)
            print(f"Epoch {i}: Accuracy = {acc:.4f}")

    return w1, b1, w2, b2


In [None]:
gradient_descent(x_train,y_train,200)

Epoch 0: Accuracy = 0.0734
Epoch 1: Accuracy = 0.2277
Epoch 2: Accuracy = 0.2824
Epoch 3: Accuracy = 0.4320
Epoch 4: Accuracy = 0.5338
Epoch 5: Accuracy = 0.5769
Epoch 6: Accuracy = 0.6318
Epoch 7: Accuracy = 0.6627
Epoch 8: Accuracy = 0.6911
Epoch 9: Accuracy = 0.7121
Epoch 10: Accuracy = 0.7298
Epoch 11: Accuracy = 0.7441
Epoch 12: Accuracy = 0.7543
Epoch 13: Accuracy = 0.7672
Epoch 14: Accuracy = 0.7738
Epoch 15: Accuracy = 0.7835
Epoch 16: Accuracy = 0.7870
Epoch 17: Accuracy = 0.7956
Epoch 18: Accuracy = 0.7980
Epoch 19: Accuracy = 0.8061
Epoch 20: Accuracy = 0.8076
Epoch 21: Accuracy = 0.8154
Epoch 22: Accuracy = 0.8157
Epoch 23: Accuracy = 0.8232
Epoch 24: Accuracy = 0.8230
Epoch 25: Accuracy = 0.8299
Epoch 26: Accuracy = 0.8302
Epoch 27: Accuracy = 0.8359
Epoch 28: Accuracy = 0.8362
Epoch 29: Accuracy = 0.8416
Epoch 30: Accuracy = 0.8418
Epoch 31: Accuracy = 0.8461
Epoch 32: Accuracy = 0.8472
Epoch 33: Accuracy = 0.8508
Epoch 34: Accuracy = 0.8515
Epoch 35: Accuracy = 0.8551
Ep

(array([[-0.00976103, -0.00037682, -0.00034527, ..., -0.02060305,
         -0.00859603, -0.00039963],
        [ 0.00235105,  0.00308936,  0.0010118 , ..., -0.00068948,
         -0.01235343, -0.00536601],
        [ 0.00305641,  0.01501075, -0.00043023, ..., -0.00728732,
          0.021372  , -0.01968051],
        ...,
        [-0.01907529,  0.00652858,  0.00550175, ...,  0.00650749,
          0.00402604, -0.00761575],
        [ 0.02463708,  0.00939878, -0.00163983, ..., -0.00889108,
         -0.02751385,  0.01726892],
        [-0.01230927,  0.00216512, -0.00081128, ...,  0.00675376,
         -0.00110389,  0.00619815]]),
 array([[ 0.00000509],
        [-0.00002973],
        [-0.00000585],
        [ 0.00000312],
        [-0.00000305],
        [-0.00000535],
        [-0.00000567],
        [ 0.00000187],
        [-0.00000361],
        [-0.00000644],
        [-0.00001029],
        [ 0.00000261],
        [ 0.00000742],
        [-0.00000023],
        [ 0.00000162],
        [ 0.00002438],
     