# 1. Preparing Our Data

In [9]:
import numpy as np
import struct, gzip, os

def _open_maybe_gz(path):
    if path.endswith('.gz'):
        return gzip.open(path, 'rb')
    return open(path, 'rb')

def load_images(path):
    with _open_maybe_gz(path) as f:
        magic, num, rows, cols = struct.unpack('>IIII', f.read(16))
        buf = f.read(rows * cols * num)
        data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
        return data.reshape(num, rows * cols) / 255.0

def load_labels(path):
    with _open_maybe_gz(path) as f:
        magic, num = struct.unpack('>II', f.read(8))
        buf = f.read(num)
        data = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
        return data


x_train = load_images("data/train-images.idx3-ubyte")   
y_train = load_labels("data/train-labels.idx1-ubyte")
x_test  = load_images("data/t10k-images.idx3-ubyte")
y_test  = load_labels("data/t10k-labels.idx1-ubyte")


In [10]:
print(x_train.shape, y_train.shape)

(60000, 784) (60000,)


### One-hot encoding

In [11]:
num_classes = 10
y_train_oh = np.eye(num_classes)[y_train]
y_test_oh  = np.eye(num_classes)[y_test]

# 2. Utiliyy functions

### ReLU function

In [None]:
def relu(x): return np.maximum(0, x)
def relu_deriv(x): return (x > 0).astype(np.float32)

### Softmax Function(For probability distibution)

In [None]:
def softmax(z):
    z = z - np.max(z, axis=1, keepdims=True)   # stability
    expz = np.exp(z)
    return expz / np.sum(expz, axis=1, keepdims=True)

### Cross entropy loss

In [1]:
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-12)) / m

# Main Class 

In [None]:
class NeuralNetwork:
    # constructor
    def __init__(self, layers , learningRate = 0.1 , seed =42):
        np.random.seed(seed)
        self.layers = layers
        self.learningRate = learningRate
        self.L = len(layers) - 1
        self.weights =[]
        self.bias =[]

        for i in range(self.L):
            input_dim = layers[i]
            output_dim = layers[i+1]

            weights = np.random.randn(input_dim,output_dim) * np.sqrt(2.0 / max(1, input_dim))
            bias = np.zeroes((1,output_dim) ,dtype = np.float32)

            sef.weights.append(weights.astype(np.float32))
            self.bias.append(bias)

    def feed_forward(self,X):
        A = X
        self.As = [A]
        self.Zs = []
        for i in range(self.L):
            Z = A @ self.weights[i] + self.bias[i]
            self.Zs.append(Z)
            if i == self.L - 1:
                A = softmax(Z)
            else:
                A = relu(Z)
            self.As.append(A)
        return A

    def back_propagation(self , X , y_true):
        m = X.shape[0]
        y_pred = self.As[-1]
        dZ = (y_pred - Y_true) / m
        
        for i in reversed(range(self.L)):
            A_prev = self.As[i]
            dw = A_prev.T @ dZ
            db = np.sum(dZ , axis=0 , keepdims=True)

            self.w[i] -= self.learningRate * dw
            self.b[i] -= self.learningRate * db

            if i > 0 :
                dA_prev = dZ @ self.weights[i].T
                dZ = dA_prev * relu_deriv(self.Zs[i-1])