In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
df = np.array(df)
m,n=df.shape
np.random.shuffle(df)

data_test = df[0:1000].T
test_x = data_test[1:n]
test_x = test_x / 255
test_y = data_test[0]

data_train = df[1000:m].T
train_x = data_train[1:n]
train_x = train_x / 255
train_y = data_train[0]


In [12]:
def initialize_parameters():
    W1 = np.random.randn(10, 784) * np.sqrt(2.0 / 784)
    b1 = np.zeros((10, 1))
    W2 = np.random.randn(10, 10) * np.sqrt(2.0 / 10)
    b2 = np.zeros((10, 1))
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(0,Z)

def softmax(Z):
    # Z shape: (num_classes, m)
    # subtract max per column for numerical stability
    Z_shift = Z - np.max(Z, axis=0, keepdims=True)
    expZ = np.exp(Z_shift)
    return expZ / np.sum(expZ, axis=0, keepdims=True)

def forward_propagation(W1,b1,W2,b2,X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1,A1,Z2,A2

def onehot(Y): 
    onehot_y = np.zeros((Y.size,Y.max()+1)) 
    onehot_y[np.arange(Y.size),Y]=1 
    return onehot_y.T

def deriv_ReLU(Z):
    return Z>0

def backward_propagation(Z1,A1,Z2,A2,W2,X,Y):
    m=Y.size
    onehot_y = onehot(Y)
    dZ2 = A2 - onehot_y
    dW2=1/m * (dZ2.dot(A1.T)) 
    db2 = 1/m * np.sum(dZ2,axis=1,keepdims=True)
    dZ1 = W2.T.dot(dZ2) * deriv_ReLU(Z1)
    dW1=1/m * (dZ1.dot(X.T))
    db1 = 1/m * np.sum(dZ1,axis=1,keepdims=True)
    return dW1, db1,dW2,db2

def update_params(W1,b1,W2,b2,dW1,db1,dW2,db2,alpha):
    W1 = W1 - alpha*dW1
    b1 = b1- alpha *db1
    W2 = W2 -alpha* dW2
    b2 = b2-alpha* db2
    return W1,b1,W2,b2

In [5]:
def get_predictions (A2):
    return np.argmax(A2, 0)

def get_accuracy (predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def grad_descent(X,Y,iterations,alpha):
    W1, b1, W2, b2 = initialize_parameters()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_propagation(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_propagation(Z1, A1, Z2, A2, W2, X, Y)
        W1, b1, W2, b2 = update_params (W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 50 == 0:
            print("Iteration:", i)
            print("Accuracy: ", get_accuracy(get_predictions(A2), Y))
    return W1, b1, W2, b2

In [13]:
 grad_descent(train_x,train_y,500,0.1)

Iteration: 0
[5 5 4 ... 7 9 5] [2 5 9 ... 1 9 3]
Accuracy:  0.07673170731707317
Iteration: 50
[2 3 4 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.7894390243902439
Iteration: 100
[2 3 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.8420487804878048
Iteration: 150
[2 5 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.8672195121951219
Iteration: 200
[2 5 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.8822926829268293
Iteration: 250
[2 5 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.8915853658536586
Iteration: 300
[2 5 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.8975609756097561
Iteration: 350
[2 5 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.9026585365853659
Iteration: 400
[2 5 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.9060731707317073
Iteration: 450
[2 5 9 ... 1 9 3] [2 5 9 ... 1 9 3]
Accuracy:  0.9090243902439025


(array([[ 0.0464751 , -0.07085879, -0.09692924, ...,  0.01756042,
         -0.01122254, -0.02779046],
        [ 0.0304601 , -0.08291017,  0.01372915, ..., -0.00485847,
         -0.07242171,  0.07259946],
        [-0.02283455,  0.01325537, -0.02471803, ...,  0.05704056,
         -0.02952274, -0.02657919],
        ...,
        [ 0.0310928 ,  0.0242592 , -0.0628171 , ...,  0.01591983,
          0.00496619, -0.00944219],
        [-0.00573453,  0.03192214,  0.03507147, ..., -0.01142345,
          0.00968191,  0.0189707 ],
        [-0.09501597, -0.02647832,  0.00839867, ...,  0.06877984,
         -0.12229293,  0.04969353]]),
 array([[ 0.27053496],
        [-0.08418159],
        [-0.14963464],
        [ 0.16125778],
        [ 0.1894525 ],
        [ 0.20607778],
        [ 0.05050412],
        [ 0.29371219],
        [ 0.06684082],
        [ 0.18133134]]),
 array([[ 0.03949924, -0.56370326,  0.33644933,  0.18654816, -0.62389074,
         -0.59958206,  1.45752685, -0.08515429, -0.79480103, -0.024