In [154]:
import numpy as np
import pandas as pd

In [155]:
file = '/home/peppo/Documents/Machine-Learning/train.csv'

data = pd.read_csv(file)
data = np.array(data)

m, n = data.shape

In [156]:
np.random.shuffle(data)

In [157]:
data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.

In [158]:
def init_param():
    W1 = np.random.randn(10,784)
    b1 = np.random.randn(10,1)

    W2 = np.random.randn(10,10)
    b2 = np.random.randn(10,1)

    return W1,b1,W2,b2   

In [159]:
W1,b1,W2,b2 = init_param()

<h2>Forward Propagation

In [160]:
def ReLu(Z):
    return np.maximum(Z,0)

In [161]:
def softmax(Z):
    return np.exp(Z) / sum(np.exp(Z))

In [162]:
def forward_prop(W1,b1,W2,b2,X):
    
    Z1 = W1.dot(X)+b1
    A1 = ReLu(Z1)
    Z2 = W2.dot(A1)+b2
    A2 = softmax(Z2)

    return Z1,A1,Z2,A2

<h2>Backward Prop

In [163]:
def one_hot(Y):
    
    one_hot_Y = np.zeros((Y.size,Y.max()+1))
    one_hot_Y[np.arange(Y.size),Y] = 1
    one_hot_Y = one_hot_Y.T

    return one_hot_Y

In [164]:
def der_ReLu(Z):
    return Z > 0

In [165]:
def backward_prop(Z1,A1,Z2,A2,X,Y):
    
    m = Y.size

    one_hot_Y = one_hot(Y)

    dZ2 = A2 - one_hot_Y
    dW2 = (1/m) * dZ2.dot(A1.T)
    db2 = (1/m) * np.sum(dZ2)
    
    dZ1 = W2.T.dot(dZ2) * der_ReLu(Z1)
    dW1 = (1/m) * dZ1.dot(X.T)
    db1 = (1/m) * np.sum(dZ1)

    return dW1,db1,dW2,db2

In [166]:
def update_params(W1,b1,W2,b2,dW1,db1,dW2,db2,alpha):
    
    W1 -= alpha*dW1
    W2 -= alpha*dW2
    b1 -= alpha*db1
    b2 -= alpha*db2

    return W1,b1,W2,b2

In [167]:
def prediction(A2):
    return np.argmax(A2,0)

def accuracy(prediction,Y):
    print(prediction,Y)
    return np.sum(prediction == Y)/Y.size

In [168]:
def gradient_descent(X,Y,interations,alpha):
    
    W1,b1,W2,b2 = init_param()
    for i in range(interations):
        
        Z1, A1, Z2, A2 = forward_prop(W1,b1,W2,b2,X)
        dW1,db1,dW2,db2 = backward_prop(Z1, A1, Z2, A2,X,Y)
        W1,b1,W2,b2 = update_params(W1,b1,W2,b2,dW1,db1,dW2,db2,alpha)

        if i % 50 == 0:

            print("Number of iterations:\t",i)
            print("Accuracy:\t",accuracy(prediction(A2),Y))
        
    return W1,b1,W2,b2

In [169]:
W1,b1,W2,b2 = gradient_descent(X_train,Y_train,10000,.5)

Number of iterations:	 0
[7 7 3 ... 4 7 3] [5 3 9 ... 1 9 4]
Accuracy:	 0.09414634146341463
Number of iterations:	 50
[3 3 4 ... 6 4 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.46726829268292686
Number of iterations:	 100
[9 6 9 ... 1 9 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.5956829268292683
Number of iterations:	 150
[3 6 9 ... 6 9 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.6559268292682927
Number of iterations:	 200
[3 6 8 ... 1 7 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.6319024390243903
Number of iterations:	 250
[9 6 9 ... 1 9 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.660829268292683
Number of iterations:	 300
[3 6 9 ... 1 9 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.7118292682926829
Number of iterations:	 350
[5 6 7 ... 1 7 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.6409268292682927
Number of iterations:	 400
[3 6 9 ... 1 9 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.7298536585365853
Number of iterations:	 450
[3 6 9 ... 1 9 4] [5 3 9 ... 1 9 4]
Accuracy:	 0.7514634146341463
Number of iterations:	 500
[3 3 9 ... 1 9 4] [5 3 9 ... 1 9 4]
Accuracy: