In [485]:
import numpy as np
import keras

In [486]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

In [487]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [488]:
def categorical(y):
    a = np.zeros(y.shape+(10,))
    n = y.shape[0]
    for i in range(n):
        a[i][y[i]] = 1
    return a

In [489]:
X_train = X_train.reshape((60000,1,784))/255
X_test = X_test.reshape((10000,1,784))/255
y_train = categorical(y_train)
y_test = categorical(y_test)

In [490]:
sigmoid = lambda z: 1/(1+np.exp(-z))
D_sigmoid = lambda z: sigmoid(z)*(1-sigmoid(z))

In [491]:
def softmax(z):
    e = np.exp(z- np.max(z))
    return e/np.sum(e)

In [492]:
relu = lambda z: np.maximum(0,z)
D_relu = lambda z: np.where(z > 0, 1, 0)

In [493]:
w1 = np.random.random((28*28, 64))
w2 = np.random.random((64, 10))
b1 = np.random.random((1,64))
b2 = np.random.random((1,10))

In [494]:
def forwadprop(inpt,w1,w2,b1,b2):
    z1 = np.dot(inpt, w1) + b1
    o1 = relu(z1)
    z2 = np.dot(o1, w2) + b2
    o2 = sigmoid(z2)
    return z1, o1, z2, o2

In [495]:
forwadprop(X_train[0],w1,w2,b1,b2)

(array([[53.08399386, 54.41538575, 56.59819484, 57.24994369, 54.31071604,
         54.39733042, 59.32823098, 56.18316099, 52.23957931, 56.70038958,
         51.96635464, 53.20944178, 53.44900097, 54.72778333, 51.85262223,
         53.05016685, 51.72017455, 52.9331423 , 56.99602398, 54.95701023,
         53.78168252, 53.67308526, 51.01503466, 59.49724621, 60.54210614,
         56.12571819, 52.38321804, 55.96861562, 52.98066626, 58.06156664,
         52.04701119, 49.48947337, 54.48132024, 53.94221097, 51.49642752,
         48.24138447, 56.65546637, 55.30780337, 55.48905904, 56.75890554,
         54.3922564 , 59.43506824, 50.99418879, 52.48611355, 59.58972984,
         55.12247732, 54.83247918, 49.05009795, 58.66445671, 53.96585625,
         50.652149  , 56.16536318, 51.91051825, 62.1409671 , 53.87827245,
         59.91740296, 59.00292825, 54.99258965, 55.25688253, 56.79712674,
         54.74354295, 54.5881383 , 51.65505342, 53.5715266 ]]),
 array([[53.08399386, 54.41538575, 56.59819484, 

In [496]:
def logloss(y,o2):
    loss = -y*np.log10(o2+1e-15) - (1-y)*np.log10(1-o2+1e-15)
    return loss

def crossentropyloss(y,o2):
    n = y.shape[0]
    loss = -np.sum(y * np.log(o2 + 1e-15))/n
    return loss

In [497]:
def backprop(x,y,w1,w2,o1,o2,z1,z2):
    err = o2 - y
    do2 = err*D_sigmoid(o2)
    do1 = do2.dot(w2.T) * D_sigmoid(o1)

    dw2 = o1.T.dot(do2)
    dw1 = x.T.dot(do1)

    db2 = do2
    db1 = do1
    
    return dw1, db1, dw2, db2

In [498]:
def predict(X):
    pred = []
    for i in X:
        _,_,_, output = forwadprop(i,w1,w2,b1,b2)
        pred.append(output)
    return np.array(pred)

In [499]:
def train(X,Y,w1,w2,b1,b2,lr=0.001,epochs=30):
    n = X.shape[0]
    for j in range(epochs):
        print(f"epoch: {j+1}")
        total_loss = 0
        for i in range(n):
            z1, o1, z2, o2 = forwadprop(X[i],w1,w2,b1,b2)
            dw1, db1, dw2, db2 = backprop(X[i],Y[i],w1,w2,o1,o2,z1,z2)
            w1 -= lr*dw1
            w2 -= lr*dw2
            b1 -= lr*db1
            b2 -= lr*db2
            total_loss += crossentropyloss(Y[i], o2)/n
        y_pred = predict(X)
        predictions = [np.argmax(i) for i in y_pred]
        accuracy = np.mean(predictions == np.argmax(Y, axis=1))
        print(F"accuracy: {accuracy}")
        print(f"loss: {total_loss}")

In [500]:
train(X_train, y_train, w1,w2,b1,b2)

epoch: 1
accuracy: 0.6674666666666667
loss: 0.9660165757487037
epoch: 2
accuracy: 0.7426666666666667
loss: 0.681912860173418
epoch: 3
accuracy: 0.76975
loss: 0.6205971833119931
epoch: 4
accuracy: 0.7851666666666667
loss: 0.5864160148048536
epoch: 5
accuracy: 0.7650333333333333
loss: 0.5690489281787364
epoch: 6
accuracy: 0.7706333333333333
loss: 0.5565535241759817
epoch: 7
accuracy: 0.7644333333333333
loss: 0.5454772972400749
epoch: 8
accuracy: 0.7882
loss: 0.5407898674043932
epoch: 9
accuracy: 0.77525
loss: 0.5329061264098608
epoch: 10
accuracy: 0.7842
loss: 0.5265484971718744
epoch: 11
accuracy: 0.7845666666666666
loss: 0.5201352562138497
epoch: 12
accuracy: 0.7724166666666666
loss: 0.5192465048481186
epoch: 13
accuracy: 0.7605666666666666
loss: 0.5147386008965925
epoch: 14
accuracy: 0.7697666666666667
loss: 0.5135258047821752
epoch: 15
accuracy: 0.7699833333333334
loss: 0.5106251798187856
epoch: 16
accuracy: 0.7596
loss: 0.5104661650472136
epoch: 17
accuracy: 0.7679
loss: 0.507256377

### The highest accuracy was 0.7938 percent which is okay for a simple Feed Forward Neural Network