In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [5]:
data = pd.read_csv('digit-recognizer/train.csv')

In [6]:
data.head()


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
data = np.array(data)

m,n = data.shape
np.random.shuffle(data)

data_dev = data[0:1000].T
y_dev = data_dev[0]
x_dev = data_dev[1:1000]

data_train = data[1000:m].T
y_train = data_train[0]
x_train = data_train[1:n]

x_train = x_train / 255



In [8]:
class Hidden_Layer:
    def __init__(self, n_neurons, n_inputs):
        self.weights = 0.10 * np.random.randn(n_neurons, n_inputs)
        self.biases = 0.10 * np.random.randn(n_neurons, 1)
        
#                     (784,n_samples)
Layer1 = Hidden_Layer(50,784)
Layer2 = Hidden_Layer(30,50)
Layer3 = Hidden_Layer(10,30)

def init_parameters(Layer1, Layer2, Layer3):
    b1 = Layer1.biases
    W1 = Layer1.weights
    W2 = Layer2.weights
    b2 = Layer2.biases
    W3 = Layer3.weights
    b3 = Layer3.biases

    return W1, b1, W2, b2, W3, b3

def Activation_ReLU(Z):
    return np.maximum(Z,0)
    
def Activation_softmax(Z):
    exp_values = np.exp(Z - np.max(Z, axis=0, keepdims=True))
    return exp_values / np.sum(exp_values, axis=0, keepdims=True)

def forward_propagation(W1, b1, W2, b2, W3, b3, X):
    Z1 = W1.dot(X) + b1
    A1 = Activation_ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = Activation_ReLU(Z2)
    Z3 = W3.dot(A2) + b3
    A3 = Activation_softmax(Z3)

    return  Z1, A1, Z2, A2, Z3, A3

def deriv_ReLU(Z):
    return Z > 0
    
def one_hot_encode(Y):
    one_hot_y = np.zeros((Y.size, Y.max() + 1))
    one_hot_y[np.arange(Y.size), Y] = 1
    return one_hot_y.T

def backward_propagation(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y):
    one_hot_y = one_hot_encode(Y)
    
    dZ3 = A3 - one_hot_y
    dW3 = (1 / m) * dZ3.dot(A2.T)
    db3 = (1 / m) * np.sum(dZ3)
    dZ2 = W3.T.dot(dZ3) * deriv_ReLU(Z2)    
    dW2 = (1 / m) * dZ2.dot(A1.T)
    db2 = (1 / m) * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * deriv_ReLU(Z1)
    dW1 = (1 / m) * dZ1.dot(X.T)
    db1 = (1 / m) * np.sum(dZ1)

    return dW1, db1, dW2, db2, dW3, db3

def update_parameters(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2  
    W3 = W3 - alpha * dW3
    b3 = b3 - alpha * db3

    return W1, b1, W2, b2, W3, b3
    
        


In [9]:
def get_predictions(A3):
    return np.argmax(A3, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2, W3, b3 = init_parameters(Layer1, Layer2, Layer3)

    for i in range(iterations):
        Z1, A1, Z2, A2, Z3, A3 = forward_propagation(W1, b1, W2, b2, W3, b3, X)
        dW1, db1, dW2, db2, dW3, db3 = backward_propagation(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y)
        W1, b1, W2, b2, W3, b3 = update_parameters(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha)

        if i% 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A3)
            print(get_accuracy(predictions, Y))
    return W1, b1, W2, b2, W3, b3

In [10]:
W1, b1, W2, b2, W3, b3 = gradient_descent(x_train, y_train, 0.10, 300)

Iteration:  0
[8 8 8 ... 4 8 8] [9 3 8 ... 2 3 5]
0.08636585365853658
Iteration:  10
[8 8 8 ... 8 5 0] [9 3 8 ... 2 3 5]
0.15121951219512195
Iteration:  20
[8 0 0 ... 0 5 0] [9 3 8 ... 2 3 5]
0.2642439024390244
Iteration:  30
[7 0 2 ... 2 5 0] [9 3 8 ... 2 3 5]
0.3515609756097561
Iteration:  40
[7 0 2 ... 2 5 0] [9 3 8 ... 2 3 5]
0.44441463414634147
Iteration:  50
[7 0 2 ... 2 3 0] [9 3 8 ... 2 3 5]
0.528
Iteration:  60
[7 0 2 ... 2 3 3] [9 3 8 ... 2 3 5]
0.6323658536585366
Iteration:  70
[7 0 9 ... 2 3 5] [9 3 8 ... 2 3 5]
0.7018292682926829
Iteration:  80
[7 0 9 ... 2 3 5] [9 3 8 ... 2 3 5]
0.7428780487804878
Iteration:  90
[7 0 9 ... 2 3 5] [9 3 8 ... 2 3 5]
0.7717560975609756
Iteration:  100
[7 0 9 ... 2 3 5] [9 3 8 ... 2 3 5]
0.792609756097561
Iteration:  110
[7 0 9 ... 2 3 5] [9 3 8 ... 2 3 5]
0.8082926829268293
Iteration:  120
[9 3 9 ... 2 3 5] [9 3 8 ... 2 3 5]
0.8217560975609756
Iteration:  130
[9 3 8 ... 2 3 5] [9 3 8 ... 2 3 5]
0.8324390243902439
Iteration:  140
[9 3 8 ... 2