# Importing and Creating Data 


In [462]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

training_data = np.array(pd.read_csv('../03_1_Neural Network from Scratch/train.csv')) # 42000,785
testing_data = np.array(pd.read_csv('../03_1_Neural Network from Scratch/test.csv'))   # 28000,784

np.random.shuffle(training_data)
np.random.shuffle(testing_data)

In [463]:
m, n = training_data.shape
a, b = testing_data.shape
training_data = training_data.T
training_data.shape , testing_data.shape, m , n , a, b

((785, 42000), (28000, 784), 42000, 785, 28000, 784)

In [464]:
print(training_data)
y_train = training_data[0]
print("\n\n",y_train)

[[1 7 3 ... 9 6 1]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


 [1 7 3 ... 9 6 1]


In [465]:
X_train = training_data[1:]/255.0
print(X_train)
print("\n\n")
print(X_train[33], " -> ", y_train[33])


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]



[0. 0. 0. ... 0. 0. 0.]  ->  0


In [466]:
testing_data = testing_data.T
y_test = testing_data[0]
X_test = testing_data[1:]/255.0

In [467]:
X_test[0].shape

(28000,)

# Defining functions ->

In [468]:
def initialize_parameters():
    W1 = np.random.randn(10, 784) * 0.01
    b1 = np.random.randn(10, 1) * 0.01
    W2 = np.random.randn(10, 10) * 0.01
    b2 = np.random.randn(10, 1) * 0.01
    
    return W1,b1,W2,b2


In [469]:
def normalize(x):
    mean = np.mean(x)
    std = np.std(x)
    return (x - mean) / std

In [470]:
def relu(x):
    return np.maximum(0,x)
    

In [471]:
def ReLU_deriv(x):
    return x>0

In [472]:
def softmax(Z):

    Z -= np.max(Z, axis=0)  # Subtract max value for numerical stability
    A = np.exp(Z) / np.sum(np.exp(Z), axis=0)
    return A

    


In [473]:
def one_hot_encoder(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [474]:
def forward_propagation(w1,b1,w2,b2,X):
    Z1= w1.dot(X) + b1
    A1=relu(Z1)
    
    Z2 = w2.dot(A1) + b2
    A2=softmax(Z2)
    
    return Z1,A1,Z2,A2

In [475]:
def backward_propagation(Z1, A1, Z2, A2, w1, w2, X, Y):
    one_hot_Y = one_hot_encoder(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2,1).reshape(-1,1)
    dZ1 = w2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1,1).reshape(-1,1)
    
    return dW1, db1, dW2, db2


In [476]:
def update_paramameters(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
  
    W1 -= alpha * dW1
    b1 -= alpha * np.reshape(db1, (10,1))
    W2 -= alpha * dW2
    b2 -= alpha * np.reshape(db2, (10,1))   
    return W1, b1, W2, b2

In [477]:
def get_predictions(A2):
    return np.argmax(A2, 0)

In [478]:
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [479]:
def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = initialize_parameters()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_propagation(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_propagation(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_paramameters(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 100 == 0:
            print("Epoch: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
    return W1, b1, W2, b2

# Testing -> 

In [480]:
W1,b1,W2,b2 = gradient_descent(X_train,y_train,0.1,1000)

Epoch:  0
[0 0 9 ... 9 9 0] [1 7 3 ... 9 6 1]
0.05042857142857143
Epoch:  100
[1 7 0 ... 9 2 1] [1 7 3 ... 9 6 1]
0.5414523809523809
Epoch:  200
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.8111428571428572
Epoch:  300
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.8618333333333333
Epoch:  400
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.8843571428571428
Epoch:  500
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.8942857142857142
Epoch:  600
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.8990714285714285
Epoch:  700
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.9032380952380953
Epoch:  800
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.906547619047619
Epoch:  900
[1 7 3 ... 9 6 1] [1 7 3 ... 9 6 1]
0.9094047619047619
