In [122]:
import numpy as np
import pandas as pd

traindf = pd.read_csv("fashion-mnist_train.csv")

In [123]:
labels = sorted(list(traindf['label'].unique()))
n_out = len(labels)
n_in = sum(col.startswith('pixel') for col in traindf.columns)
n_hidden = 128

In [124]:
dummies = pd.get_dummies(traindf['label'], prefix='y')
traindf = pd.concat([traindf, dummies], axis=1)
for i in labels:
    traindf[f'y_{i}'] = (traindf['label'] == i).astype(int)

In [125]:
X = traindf[[f'pixel{i}' for i in range(1, n_in+1)]].to_numpy() / 255.0
Y = traindf[[f'y_{i}' for i in range(n_out)]].to_numpy()

In [173]:
# This is called the Xavier initialisation
rng = np.random.default_rng(42)
Wh = rng.normal(0, np.sqrt(2/(n_in+n_hidden)), (n_in, n_hidden)).astype(np.float32)
bh = np.zeros(n_hidden, dtype=np.float32)
Wo = rng.normal(0, np.sqrt(2/(n_hidden+n_out)), (n_hidden, n_out)).astype(np.float32)
bo = np.zeros(n_out, dtype=np.float32)

In [157]:
sigmoid  = lambda z: 1/(1+np.exp(-z))
dsigmoid = lambda z: sigmoid(z)*(1-sigmoid(z))

In [149]:
def forward(X):
    Zh = X @ Wh + bh          # (m,n_hidden)
    H  = sigmoid(Zh)
    Zo = H @ Wo + bo          # (m,n_out)
    O  = sigmoid(Zo)
    return O, (X, Zh, H, Zo)

def backward(Y, cache):
    X, Zh, H, Zo = cache
    m = X.shape[0]

    dZo = 2*(sigmoid(Zo) - Y) * dsigmoid(Zo)   # (m,n_out)
    dWo = (H.T @ dZo) / m
    dbo = dZo.mean(axis=0)

    dZh = (dZo @ Wo.T) * dsigmoid(Zh)          # (m,n_hidden)
    dWh = (X.T @ dZh) / m
    dbh = dZh.mean(axis=0)
    return dWh, dbh, dWo, dbo

In [174]:
O, _ = forward(X)
mse = np.mean((O - Y)**2)
print(f"mse={mse}")

mse=0.2877080212738844


In [210]:
lr     = 0.1
epochs = 200
batch  = 100

for epoch in range(1, epochs+1):
    idx = np.random.permutation(X.shape[0])
    for s in range(0, len(X), batch):
        Xb, Yb = X[idx[s:s+batch]], Y[idx[s:s+batch]]

        O, cache = forward(Xb)
        gWh, gbh, gWo, gbo = backward(Yb, cache)

        Wh -= lr * gWh
        bh -= lr * gbh
        Wo -= lr * gWo
        bo -= lr * gbo

    O, _ = forward(X)
    mse = np.mean((O - Y)**2)
    print(f"epoch {epoch}   mse={mse}")


epoch 1   mse=0.00636460952301675
epoch 2   mse=0.0064409437641651
epoch 3   mse=0.006349809194943744
epoch 4   mse=0.006348969151066984
epoch 5   mse=0.006311244686764653
epoch 6   mse=0.006309087757872851
epoch 7   mse=0.006283105428652491
epoch 8   mse=0.006306483189697265
epoch 9   mse=0.0063143174313165604
epoch 10   mse=0.0062855287135852814
epoch 11   mse=0.006289022414771383
epoch 12   mse=0.006277081617475266
epoch 13   mse=0.006260934078232881
epoch 14   mse=0.006284387808541955
epoch 15   mse=0.0063244194323751265
epoch 16   mse=0.006228854782115879
epoch 17   mse=0.006236719946780898
epoch 18   mse=0.006237359958948879
epoch 19   mse=0.006226915042717576
epoch 20   mse=0.006204102058775497
epoch 21   mse=0.0062269440185615515
epoch 22   mse=0.0062063595624193574
epoch 23   mse=0.006185622819842631
epoch 24   mse=0.006202265458981526
epoch 25   mse=0.006222175370937365
epoch 26   mse=0.006154071239413308
epoch 27   mse=0.006157370704075574
epoch 28   mse=0.006266362148551282

In [134]:
testdf = pd.read_csv("fashion-mnist_test.csv")
dummies = pd.get_dummies(testdf['label'], prefix='y')
testdf = pd.concat([testdf, dummies], axis=1)
for i in labels:
    testdf[f'y_{i}'] = (testdf['label'] == i).astype(int)

In [135]:
X_test = testdf[[f'pixel{i}' for i in range(1, n_in+1)]].to_numpy() / 255.0
Y_test = testdf[[f'y_{i}' for i in range(n_out)]].to_numpy()

In [211]:
O_test, _ = forward(X_test)
classes_preds = np.argmax(O_test, axis=1)
classes = testdf['label'].to_numpy()

pct_accuracy = 0
for i in range(len(classes)):
    if classes[i] == classes_preds[i]:
        pct_accuracy += 1
pct_accuracy /= len(classes)
pct_accuracy *= 100
print(pct_accuracy)
mse = np.mean((O_test - Y_test)**2)
print(f"mse={mse}")

90.08
mse=0.015722601611585837
