In [6]:
import numpy as np 
import pandas as pd 

In [10]:
data = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
data = np.array(data)

In [24]:
np.random.shuffle(data)
m,n = data.shape # m is the number of training examples, n is the number of pixels + 1 gfor the label

validation_data = data[:1000].T
val_y = validation_data[0]
val_x = validation_data[1:n]
val_x = val_x / 255.0

training_data = data[1000:m].T
train_y = training_data[0]
train_x = training_data[1:n]
train_x = train_x / 255.0


In [32]:

def init_params():
    W1 = np.random.rand(10,784) - 0.5
    b1 = np.random.rand(10,1) - 0.5
    W2 = np.random.rand(10,10) - 0.5
    b2 = np.random.rand(10,1) - 0.5
    return W1, b1, W2, b2

def ReLU(Z1):
    return np.maximum(0, Z1)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True)) 
    return expZ / np.sum(expZ, axis=0, keepdims=True)
    
def forward_prop(W1,b1,W2,b2,X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def one_hot(Y):
    # create np array of zeroes of size
    one_hot_y = np.zeros((Y.size, Y.max() + 1))
    # set label specified in Y to 1
    one_hot_y[np.arange(Y.size), Y] = 1
    one_hot_y = one_hot_y.T
    return one_hot_y

def deriv_ReLU(Z):
    return Z > 0

def back_prop(Z1, A1, Z2, A2, W2, X, Y):
    m = Y.size
    one_hot_y = one_hot(Y)
    dZ2 = A2 - one_hot_y
    dW2 = 1/m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = W2.T.dot(dZ2) * deriv_ReLU(Z1)
    dW1 = 1/m * dZ1.dot(X.T)
    db1= 1/m * np.sum(dZ1,axis=1, keepdims=True)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2,0)

def get_accuracy(prediction, Y):
    return np.sum(prediction == Y) / Y.size
    

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1,b1,W2,b2,X)
        dW1, db1, dW2, db2 = back_prop(Z1, A1, Z2, A2, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("iteration ", i)
            print("accuracy ", get_accuracy(get_predictions(A2), Y))

    return W1, b1, W2, b2

W1, b1, W2, b2 = gradient_descent(train_x, train_y, 0.1, 500)
    
    

iteration  0
accuracy  0.09746341463414634
iteration  10
accuracy  0.16478048780487806
iteration  20
accuracy  0.25514634146341464
iteration  30
accuracy  0.35709756097560974
iteration  40
accuracy  0.43634146341463415
iteration  50
accuracy  0.5009268292682927
iteration  60
accuracy  0.5540243902439025
iteration  70
accuracy  0.5940243902439024
iteration  80
accuracy  0.6250487804878049
iteration  90
accuracy  0.6497804878048781
iteration  100
accuracy  0.6717804878048781
iteration  110
accuracy  0.6885121951219512
iteration  120
accuracy  0.7032195121951219
iteration  130
accuracy  0.7165609756097561
iteration  140
accuracy  0.7269756097560975
iteration  150
accuracy  0.7363658536585366
iteration  160
accuracy  0.7460731707317073
iteration  170
accuracy  0.7546829268292683
iteration  180
accuracy  0.7620243902439024
iteration  190
accuracy  0.7685609756097561
iteration  200
accuracy  0.7749756097560976
iteration  210
accuracy  0.7797804878048781
iteration  220
accuracy  0.78514634146

In [33]:
def make_prediction(X, W1, b1, W2, b2):
    _,_,_,A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

validation_predictions = make_prediction(val_x,W1, b1, W2, b2)
print(get_accuracy(validation_predictions,val_y))

0.848
