In [95]:
import numpy as np
import tensorflow as tf

In [96]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()


In [97]:
def hot_value(n):
    return np.array([1 if i==n else 0 for i in range(0,10)])

In [106]:
Y=np.array([hot_value(i) for i in y_train])
YT=np.array([hot_value(i) for i in y_test])
X = np.array([i.flatten() for i in X_train]) / 255.0
XT = np.array([i.flatten() for i in X_test]) / 255.0
print(X[0].shape)

(784,)


In [99]:
def softmax(z):
    exp=np.exp(z-np.max(z,axis=1,keepdims=True))
    return exp/np.sum(exp,axis=1,keepdims=True)

def cross_ent(y_true,y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / m

def relu(z):
    return np.maximum(0, np.minimum(z, 1e6))  # clip extreme values

def relu_derivative(z):
    return (z > 0).astype(float)

In [132]:
np.random.seed(42)
W1 = np.random.randn(784, 10) * np.sqrt(2.0/784)
W2 = np.random.randn(10, 4)   * np.sqrt(2.0/10)
W3 = np.random.randn(4, 10)   * np.sqrt(2.0/4)

b1=np.zeros((1,10))
b2=np.zeros((1,4))
b3=np.zeros((1,10))

lr=0.1
epochs=10000

In [133]:
def forward(X, W1, b1, W2, b2, W3, b3):
    Z1 = X @ W1 + b1
    A1 = relu(Z1)

    Z2 = A1 @ W2 + b2
    A2 = relu(Z2)

    Z3 = A2 @ W3 + b3
    A3 = softmax(Z3)

    cache = (Z1, A1, Z2, A2, Z3, A3)
    return A3, cache


In [134]:
def backward(X, y, cache, W1, W2, W3):
    Z1, A1, Z2, A2, Z3, A3 = cache
    m = X.shape[0]

    dZ3 = A3 - y
    dW3 = A2.T @ dZ3 / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    dA2 = dZ3 @ W3.T
    dZ2 = dA2 * relu_derivative(Z2)
    dW2 = A1.T @ dZ2 / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = dZ2 @ W2.T
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = X.T @ dZ1 / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2, dW3, db3


In [135]:
for epoch in range(epochs):
    y_pred, cache = forward(X, W1, b1, W2, b2, W3, b3)

    loss = cross_ent(Y, y_pred)

    dW1, db1, dW2, db2, dW3, db3 = backward(X, Y, cache, W1, W2, W3)

    W1 -= lr * dW1; b1 -= lr * db1
    W2 -= lr * dW2; b2 -= lr * db2
    W3 -= lr * dW3; b3 -= lr * db3

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


  Z1 = X @ W1 + b1
  Z1 = X @ W1 + b1
  Z1 = X @ W1 + b1
  Z2 = A1 @ W2 + b2
  Z2 = A1 @ W2 + b2
  Z2 = A1 @ W2 + b2
  Z3 = A2 @ W3 + b3
  Z3 = A2 @ W3 + b3
  Z3 = A2 @ W3 + b3
  dW3 = A2.T @ dZ3 / m
  dW3 = A2.T @ dZ3 / m
  dW3 = A2.T @ dZ3 / m
  dA2 = dZ3 @ W3.T
  dA2 = dZ3 @ W3.T
  dA2 = dZ3 @ W3.T
  dW2 = A1.T @ dZ2 / m
  dW2 = A1.T @ dZ2 / m
  dW2 = A1.T @ dZ2 / m
  dA1 = dZ2 @ W2.T
  dA1 = dZ2 @ W2.T
  dA1 = dZ2 @ W2.T
  dW1 = X.T @ dZ1 / m
  dW1 = X.T @ dZ1 / m
  dW1 = X.T @ dZ1 / m


Epoch 0, Loss: 2.4085
Epoch 100, Loss: 1.4177
Epoch 200, Loss: 1.0429
Epoch 300, Loss: 0.8812
Epoch 400, Loss: 0.7500
Epoch 500, Loss: 0.6587
Epoch 600, Loss: 0.5972
Epoch 700, Loss: 0.5540
Epoch 800, Loss: 0.5222
Epoch 900, Loss: 0.4966
Epoch 1000, Loss: 0.4743
Epoch 1100, Loss: 0.4546
Epoch 1200, Loss: 0.4362
Epoch 1300, Loss: 0.4180
Epoch 1400, Loss: 0.4027
Epoch 1500, Loss: 0.3903
Epoch 1600, Loss: 0.3799
Epoch 1700, Loss: 0.3710
Epoch 1800, Loss: 0.3633
Epoch 1900, Loss: 0.3565
Epoch 2000, Loss: 0.3505
Epoch 2100, Loss: 0.3450
Epoch 2200, Loss: 0.3401
Epoch 2300, Loss: 0.3355
Epoch 2400, Loss: 0.3313
Epoch 2500, Loss: 0.3273
Epoch 2600, Loss: 0.3235
Epoch 2700, Loss: 0.3200
Epoch 2800, Loss: 0.3167
Epoch 2900, Loss: 0.3135
Epoch 3000, Loss: 0.3105
Epoch 3100, Loss: 0.3077
Epoch 3200, Loss: 0.3050
Epoch 3300, Loss: 0.3024
Epoch 3400, Loss: 0.2999
Epoch 3500, Loss: 0.2976
Epoch 3600, Loss: 0.2955
Epoch 3700, Loss: 0.2934
Epoch 3800, Loss: 0.2915
Epoch 3900, Loss: 0.2896
Epoch 4000, 

In [137]:
def accuracy(y_true, y_pred):
    pred_labels = np.argmax(y_pred, axis=1)
    true_labels = np.argmax(y_true, axis=1)
    return np.mean(pred_labels == true_labels)


In [138]:
y_test_pred, _ = forward(XT, W1, b1, W2, b2, W3, b3)
test_acc = accuracy(YT, y_test_pred)
print("Test Accuracy:", test_acc)


Test Accuracy: 0.9174


  Z1 = X @ W1 + b1
  Z1 = X @ W1 + b1
  Z1 = X @ W1 + b1
  Z2 = A1 @ W2 + b2
  Z2 = A1 @ W2 + b2
  Z2 = A1 @ W2 + b2
  Z3 = A2 @ W3 + b3
  Z3 = A2 @ W3 + b3
  Z3 = A2 @ W3 + b3


In [139]:
np.save("W1.npy", W1)
np.save("b1.npy", b1)
np.save("W2.npy", W2)
np.save("b2.npy", b2)
np.save("W3.npy", W3)
np.save("b3.npy", b3)