This code is taken from https://www.youtube.com/watch?v=w8yWXqWQYmU&t=1549s

In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os

In [4]:
dir = os.getcwd() + '\\data'
df = pd.read_csv(dir+'\\train.csv')
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
df = np.array(df)
m,n = df.shape
np.random.shuffle(df)

dfDev = df[0:1000].T
yDev = dfDev[0]
xDev = dfDev[1:n]
xDev = xDev/255.

dfTrain = df[1000:m].T
yTrain = dfTrain[0]
xTrain = dfTrain[1:n]
xTrain = xTrain/255.

In [12]:
def init_params():
    w1 = np.random.rand(10,n-1)-0.5
    b1 = np.random.rand(10,1)-0.5
    w2 = np.random.rand(10,10)-0.5
    b2 = np.random.rand(10,1)-0.5
    return w1,b1,w2,b2

def ReLu(x):
    return np.maximum(x,0,x)

def softMax(x):
    return np.exp(x)/sum(np.exp(x))

def forward_prop(w1,b1,w2,b2,x):
    z1 = w1.dot(x) + b1
    a1 = ReLu(z1)
    z2 = w2.dot(a1) + b2
    a2 = softMax(z2)
    return z1,a1,z2,a2

def deriv_ReLu(x):
    return x > 0

def one_hot(y):
    oneHoty = np.zeros((y.size,y.max()+1))
    oneHoty[np.arange(y.size),y] = 1
    oneHoty = oneHoty.T
    return oneHoty

def back_prop(z1,a1,z2,a2,w2,x,y):
    oneHoty = one_hot(y)
    dZ2 = a2 - oneHoty
    dW2 = 1 / y.size * dZ2.dot(a1.T)
    dB2 = 1 / y.size * np.sum(dZ2)
    dZ1 = w2.T.dot(dZ2) * deriv_ReLu(z1)
    dW1 = 1 / y.size * dZ1.dot(x.T)
    dB1 = 1 / y.size * np.sum(dZ1)
    return dW1,dB1,dW2,dB2

def update_params(w1,b1,w2,b2,dW1,dB1,dW2,dB2,alpha):
    w1 = w1 - dW1 * alpha
    b1 = b1 - dB1 * alpha
    w2 = w2 - dW2 * alpha
    b2 = b2 - dB2 * alpha
    return w1,b1,w2,b2

def get_predictions(a2):
    return np.argmax(a2,0)

def get_accuracy(predictions,y):
    print(predictions, y)
    return np.sum(predictions == y) / y.size

def gradient_descent(x,y,nit,alpha):
    w1,b1,w2,b2 = init_params()
    for i in range(nit):
        z1,a1,z2,a2 = forward_prop(w1,b1,w2,b2,x)
        dW1,dB1,dW2,dB2 = back_prop(z1,a1,z2,a2,w2,x,y)
        w1,b1,w2,b2 = update_params(w1,b1,w2,b2,dW1,dB1,dW2,dB2,alpha)
        if i % 50 == 0:
            print("Iteration: ", i)
            print("Accuracy: ", get_accuracy(get_predictions(a2),y))
    return w1,b1,w2,b2

In [14]:
w1,b1,w2,b2 = gradient_descent(xTrain, yTrain, 1000, 0.1)

Iteration:  0
[4 4 4 ... 4 4 4] [2 7 0 ... 0 6 0]
Accuracy:  0.0951219512195122
Iteration:  50
[4 4 2 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.44170731707317074
Iteration:  100
[4 4 6 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.604
Iteration:  150
[4 9 6 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.6979024390243902
Iteration:  200
[4 9 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.752219512195122
Iteration:  250
[4 9 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.7863902439024391
Iteration:  300
[4 9 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.8097317073170732
Iteration:  350
[4 7 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.8263414634146341
Iteration:  400
[4 7 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.8374634146341463
Iteration:  450
[4 7 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.8451951219512195
Iteration:  500
[4 7 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.8525853658536585
Iteration:  550
[4 7 0 ... 0 5 0] [2 7 0 ... 0 6 0]
Accuracy:  0.858
Iteration:  600
[4 7 0 ... 0 8 0] [2 7 0 ... 