In [24]:
import os
import numpy as np
from PIL import Image

np.random.seed(42)


In [27]:
def load_data(base_path):
    X, y = [], []
    labels = {}

    class_names = sorted(os.listdir(base_path))

    for idx, cname in enumerate(class_names):
        labels[idx] = cname
        folder = os.path.join(base_path, cname)

        for img_name in os.listdir(folder):
            img = Image.open(os.path.join(folder, img_name)).convert("L")
            img = img.resize((8, 8))  # CRITICAL
            arr = np.array(img, dtype=np.float32).flatten()
            arr = (arr - np.mean(arr)) / (np.std(arr) + 1e-8)
            X.append(arr)
            y.append(idx)

    X = np.array(X)
    y = np.array(y)

    perm = np.random.permutation(len(X))
    return X[perm], y[perm], labels


In [30]:
X_train, y_train, labels = load_data("Train")
X_test, _, _ = load_data("Test")

print("Train shape:", X_train.shape)
print("Classes:", labels)


Train shape: (4000, 64)
Classes: {0: 'Jade', 1: 'James', 2: 'Jane', 3: 'Joel', 4: 'Jovi'}


In [32]:
input_dim = X_train.shape[1]
hidden_dim = 256
num_classes = len(labels)

W1 = np.random.randn(input_dim, hidden_dim) * np.sqrt(2 / input_dim)
b1 = np.zeros((1, hidden_dim))

W2 = np.random.randn(hidden_dim, num_classes) * np.sqrt(2 / hidden_dim)
b2 = np.zeros((1, num_classes))


In [25]:
def relu(z):
    return np.maximum(0, z)

def softmax(z):
    z = z - np.max(z, axis=1, keepdims=True)  # stability
    exp = np.exp(z)
    return exp / np.sum(exp, axis=1, keepdims=True)

def cross_entropy(y_pred, y_true):
    m = y_true.shape[0]
    return -np.mean(np.log(y_pred[range(m), y_true] + 1e-9))

In [33]:
lr = 0.1
epochs = 300

for epoch in range(epochs):
    # ---------- Forward ----------
    z1 = X_train @ W1 + b1
    a1 = relu(z1)
    z2 = a1 @ W2 + b2
    y_hat = softmax(z2)

    loss = cross_entropy(y_hat, y_train)

    # ---------- Backward ----------
    m = X_train.shape[0]

    dz2 = y_hat
    dz2[range(m), y_train] -= 1
    dz2 /= m

    dW2 = a1.T @ dz2
    db2 = np.sum(dz2, axis=0, keepdims=True)

    da1 = dz2 @ W2.T
    dz1 = da1 * (z1 > 0)

    dW1 = X_train.T @ dz1
    db1 = np.sum(dz1, axis=0, keepdims=True)

    # ---------- Update ----------
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")


Epoch 20/300, Loss: 0.9164
Epoch 40/300, Loss: 0.6585
Epoch 60/300, Loss: 0.5321
Epoch 80/300, Loss: 0.4630
Epoch 100/300, Loss: 0.4166
Epoch 120/300, Loss: 0.3831
Epoch 140/300, Loss: 0.3576
Epoch 160/300, Loss: 0.3377
Epoch 180/300, Loss: 0.3218
Epoch 200/300, Loss: 0.3085
Epoch 220/300, Loss: 0.2973
Epoch 240/300, Loss: 0.2876
Epoch 260/300, Loss: 0.2790
Epoch 280/300, Loss: 0.2712
Epoch 300/300, Loss: 0.2641


In [34]:
print("ReLU active %:", np.mean(a1 > 0))


ReLU active %: 0.4431240234375


In [35]:
z1_train = X_train @ W1 + b1
a1_train = relu(z1_train)
z2_train = a1_train @ W2 + b2

train_preds = np.argmax(z2_train, axis=1)
print("Training accuracy:", np.mean(train_preds == y_train))


Training accuracy: 0.91875


In [36]:
z1_test = X_test @ W1 + b1
a1_test = relu(z1_test)
z2_test = a1_test @ W2 + b2

test_preds = np.argmax(z2_test, axis=1)
predicted_classes = [labels[p] for p in test_preds]

predicted_classes[:10]

['James',
 'Jade',
 'Jade',
 'Jade',
 'James',
 'James',
 'Jade',
 'Jade',
 'James',
 'Jovi']