In [39]:
import numpy as np
import pandas as pd

In [40]:
df = pd.read_csv("train.csv").to_numpy().T
paramSize, dataSize = df.shape


In [41]:
x = df[1:] / 255
y = df[0]

In [42]:
def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def ReLU(Z, grad=False):
    if grad:
        return Z > 0
    return np.maximum(Z, 0)

def sigmoid(Z, grad=False):
    t = (1 / (1 + np.exp(-Z)))
    if grad:
        return t * (1- t)
    return t

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size

In [43]:
def parameters():
    W1 = np.random.rand(64, 784) - 0.5
    b1 = np.random.rand(64, 1) - 0.5
    W2 = np.random.rand(32, 64) - 0.5
    b2 = np.random.rand(32, 1) - 0.5
    W3 = np.random.rand(10, 32) - 0.5
    b3 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2, W3, b3

def forward(w1, b1, w2, b2, w3, b3, x):
    a1 = np.dot(w1, x) + b1
    z1 = ReLU(a1, grad=False)    
    a2 = np.dot(w2, z1) + b2
    z2 = ReLU(a2)
    a3 = np.dot(w3, z2) + b3
    z3 = softmax(a3)
    return a1, z1, a2, z2, a3, z3

def backward(a1, z1, z2, a2, w2, w3, z3, x, y):
    one_hot_Y = one_hot(y)
    loss = (z3 - one_hot_Y)
    dw3 = (1/dataSize) * np.dot(loss, z2.T)
    db3 = (1/dataSize) * np.sum(loss)
    da2 = ReLU(a2, grad=True) * np.dot(w3.T, loss)
    dw2 = (1/dataSize) * np.dot(da2, z1.T)
    db2 = (1/dataSize) * np.sum(da2)
    da1 = ReLU(a1, grad=True) * np.dot(w2.T, da2)
    dw1 = (1/dataSize) * np.dot(da1, x.T)
    db1 = (1/dataSize) * np.sum(da1)
    return dw1, db1, dw2, db2, dw3, db3

def train(x, y, epochs, lr):
    w1, b1, w2, b2, w3, b3 = parameters()
    for epoch in range(epochs+1):
        a1, z1, a2, z2, a3, z3 = forward(w1, b1, w2, b2, w3, b3, x)
        dw1, db1, dw2, db2, dw3, db3 = backward(a1, z1, z2, a2, w2, w3, z3, x, y)
        w1 -= dw1 * lr
        b1 -= db1 * lr
        w2 -= dw2 * lr
        b2 -= db2 * lr
        w3 -= dw3 * lr
        b3 -= db3 * lr

        if (epoch%10 == 0):
            pred = get_predictions(z3)
            acc = get_accuracy(pred, y)
            print(f"Epoch: {epoch}  Accuracy: {acc}")
    return w1, b1, w2, b2, w3, b3
    

In [44]:
w1, b1, w2, b2, w3, b3 = train(x, y, 2000, 0.1)

Epoch: 0  Accuracy: 0.07035714285714285
Epoch: 10  Accuracy: 0.31242857142857144
Epoch: 20  Accuracy: 0.46923809523809523
Epoch: 30  Accuracy: 0.5705238095238095
Epoch: 40  Accuracy: 0.635547619047619
Epoch: 50  Accuracy: 0.6811190476190476
Epoch: 60  Accuracy: 0.7126666666666667
Epoch: 70  Accuracy: 0.7359523809523809
Epoch: 80  Accuracy: 0.7548095238095238
Epoch: 90  Accuracy: 0.7684285714285715
Epoch: 100  Accuracy: 0.7806666666666666
Epoch: 110  Accuracy: 0.790047619047619
Epoch: 120  Accuracy: 0.7992619047619047
Epoch: 130  Accuracy: 0.8069047619047619
Epoch: 140  Accuracy: 0.813952380952381
Epoch: 150  Accuracy: 0.8197380952380953
Epoch: 160  Accuracy: 0.8254285714285714
Epoch: 170  Accuracy: 0.8300952380952381
Epoch: 180  Accuracy: 0.8348333333333333
Epoch: 190  Accuracy: 0.8385714285714285
Epoch: 200  Accuracy: 0.8424047619047619
Epoch: 210  Accuracy: 0.8462380952380952
Epoch: 220  Accuracy: 0.8495238095238096
Epoch: 230  Accuracy: 0.8525952380952381
Epoch: 240  Accuracy: 0.855

In [56]:
def over_train(w1, b1, w2, b2, w3, b3, x, y, epochs, lr):
    prev_acc = 0
    for epoch in range(epochs+1):
        a1, z1, a2, z2, a3, z3 = forward(w1, b1, w2, b2, w3, b3, x)
        dw1, db1, dw2, db2, dw3, db3 = backward(a1, z1, z2, a2, w2, w3, z3, x, y)
        w1 -= dw1 * lr
        b1 -= db1 * lr
        w2 -= dw2 * lr
        b2 -= db2 * lr
        w3 -= dw3 * lr
        b3 -= db3 * lr

        if (epoch%10 == 0):
            pred = get_predictions(z3)
            acc = get_accuracy(pred, y)
            imp = acc - prev_acc
            prev_acc = acc
            print(f"Epoch: {epoch}  Accuracy: {acc}  Improvement: {imp}")
    return w1, b1, w2, b2, w3, b3

In [58]:
w1, b1, w2, b2, w3, b3 = over_train(w1, b1, w2, b2, w3, b3, x, y, 300, 0.1)

Epoch: 0  Accuracy: 0.9972142857142857  Improvement: 0.9972142857142857
Epoch: 10  Accuracy: 0.9972142857142857  Improvement: 0.0
Epoch: 20  Accuracy: 0.9972142857142857  Improvement: 0.0
Epoch: 30  Accuracy: 0.9972142857142857  Improvement: 0.0
Epoch: 40  Accuracy: 0.9972380952380953  Improvement: 2.3809523809537048e-05
Epoch: 50  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 60  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 70  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 80  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 90  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 100  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 110  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 120  Accuracy: 0.9972380952380953  Improvement: 0.0
Epoch: 130  Accuracy: 0.9972619047619048  Improvement: 2.3809523809537048e-05
Epoch: 140  Accuracy: 0.9972619047619048  Improvement: 0.0
Epoch: 150  Accuracy: 0.9972619047619048  Improvement: 0.0
Epoch: 160  Ac

In [53]:
test = pd.read_csv("test.csv").to_numpy().T
test = test / 255

In [54]:
a1, z1, a2, z2, a3, z3 = forward(w1, b1, w2, b2, w3, b3, test)
test_pred = get_predictions(z3)

In [55]:
predictions = pd.DataFrame(test_pred)
predictions = pd.DataFrame({'ImageId': range(1, len(predictions) + 1), 'Label': predictions[0]})
predictions.to_csv("submission.csv", index=False)