In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
data = pd.read_csv('/content/drive/MyDrive/Project/MNIST/train.csv')
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
data = np.array(data)
m,n = data.shape
np.random.shuffle(data)
x = data[:,1:]/255
y = data[:,0]

x_train = x[:m-8400,:]
y_train = y[:m-8400]

x_test = x[m-8400:,:]
y_test = y[m-8400:]

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(33600, 784)
(33600,)
(8400, 784)
(8400,)


In [17]:
def initial_parameters():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

In [18]:
def ReLU(Z):
    return np.maximum(Z, 0)


def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

In [19]:
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X.T) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2


def one_hot(Y):
    classes  = np.max(Y) + 1
    one_hot_Y = np.zeros((Y.size, classes))
    for i in range(Y.size):
        one_hot_Y[i, Y[i]] = 1
    return one_hot_Y

def re_relu(z):
    return z>0

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y.T
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) *re_relu(Z1)
    dW1 = 1 / m * dZ1.dot(X)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

In [20]:
def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size


def get_predictions(A2):
    return np.argmax(A2, 0)

def Gradient_descent(X, Y, alpha, iterations):
    W1,b1,W2,b2 = initial_parameters()
    for i in range(iterations):
        z1,a1,z2,a2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(z1, a1, z2, a2, W1, W2, X, Y)
        W1 = W1 - alpha * dW1
        b1 = b1 - alpha * db1
        W2 = W2 - alpha * dW2
        b2 = b2 - alpha * db2
        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(a2)
            print(get_accuracy(predictions, Y))

    return W1,b1,W2,b2

In [21]:
import time

start_time = time.time()

W1, b1, W2, b2 = Gradient_descent(x_train, y_train, 0.10, 500)

end_time = time.time()
time_taken = end_time - start_time
print(f"Total time taken: {time_taken:.4f} seconds")
time_taken = time_taken/500
print(f"Time taken per iteration: {time_taken:.4f} seconds")

Iteration:  0
0.14744047619047618
Iteration:  10
0.19910714285714284
Iteration:  20
0.225
Iteration:  30
0.2604464285714286
Iteration:  40
0.2913392857142857
Iteration:  50
0.31318452380952383
Iteration:  60
0.3326190476190476
Iteration:  70
0.35235119047619046
Iteration:  80
0.3723214285714286
Iteration:  90
0.3960416666666667
Iteration:  100
0.42288690476190477
Iteration:  110
0.4500297619047619
Iteration:  120
0.4795238095238095
Iteration:  130
0.5101785714285715
Iteration:  140
0.5385714285714286
Iteration:  150
0.5663095238095238
Iteration:  160
0.5918452380952381
Iteration:  170
0.6155952380952381
Iteration:  180
0.6350297619047619
Iteration:  190
0.6519047619047619
Iteration:  200
0.6655654761904762
Iteration:  210
0.6788988095238095
Iteration:  220
0.6904166666666667
Iteration:  230
0.70125
Iteration:  240
0.7102678571428571
Iteration:  250
0.7188392857142857
Iteration:  260
0.7277678571428572
Iteration:  270
0.7338095238095238
Iteration:  280
0.7413988095238095
Iteration:  290

In [22]:
z_test_1, a_test_1, z_test_2, a_test_2 = forward_prop(W1, b1, W2, b2, x_test)
predictions = get_predictions(a_test_2)
print(get_accuracy(predictions, y_test))

0.8145238095238095
