In [1]:
import pandas as pd
from scipy.stats import truncnorm
import numpy as np 
import matplotlib.pyplot as plt 
from scipy.special import expit as activation_function

In [2]:
df = pd.read_csv("fashion-mnist_train.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
data = np.array(df)
m,n = data.shape

data_train = data[0:m].T
Y_train = data_train[0]
X_train = data_train[1:n]/255
data_train.shape

(785, 60000)

In [10]:
X_train.shape

(784, 60000)

In [11]:
Y_train.shape

(60000,)

In [41]:
#defining the parameters for the weights 
def init_params():
    w1 = np.random.rand(10, 784) - 0.5 
    b1 = np.random.rand(10, 1) - 0.5
    w2 = np.random.rand(10, 10) - 0.5 
    b2 = np.random.rand(10, 1) - 0.5
    return w1, b1, w2, b2

def ReLU(z):
    return np.maximum(0,z)

def softmax(z):
    return np.exp(z) / np.sum(np.exp(z), axis = 0)

def forward_pass(w1, b1, w2, b2, X):
    z1 = w1.dot(X) + b1
    a1 = ReLU(z1)
    z2 = w2.dot(a1) + b2
    a2 = softmax(z2)
    return z1, a1, z2, a2
    
def one_hotY(Y):
    one_hot_Y = np.zeros ( (Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def one_hotX(X):
    one_hot_X = np.zeros ( (X.size, X.max() + 1))
    one_hot_X[np.arange(X.size), X] = 1
    one_hot_X = one_hot_X.T
    return one_hot_X

def derivative_ReLU(z):
    return z > 0
    
def backwards_propogation(z1, a1, w2, z2, a2, Y, X):
    m = Y.size
    one_hot_Y = one_hotY(Y)
    dz2 = a2 - one_hot_Y
    dw2 = 1/m * dz2.dot(a1.T)
    db2 = 1/m * np.sum(dz2)
    dz1 = w2.T.dot(dz2) * derivative_ReLU(z1)
    dw1 = 1/m * dz1.dot(X.T)
    db1 = 1/m * np.sum(dz1)
    return dw1, db1, dw2, db2

def update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha):
    w1 = w1 - alpha * dw1
    b1 = b1 - alpha * db1
    w2 = w2 - alpha * dw2
    b2 = b2 - alpha * db2
    return w1, b1, w2, b2

def get_predictions (a2):
    return np.argmax(a2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def train(input_vector, target_vector,epochs=20):
    w1, b1, w2, b2 = init_params()
    for i in range(epochs):
        z1, a1, z2, a2 = forward_pass(w1, b1, w2, b2, input_vectors)
        dw1, db1, dw2, db2 = backwards_propogation(z1, a1, w2, z2, a2, target_vector, input_vector)
        print("Iteration: ", i) 
        print("Accuracy: ", get_accuracy(get_predictions (a2), Y))  
    return w1,b1,w2,b2
            
    


def gradient_descent(X,Y, iterations, alpha):
    w1, b1, w2, b2 = init_params()
    for i in range (iterations):
        z1, a1, z2, a2 = forward_prop(w1, b1, w2, b2, X)
        dw1, db1, dw2, db2 = back_prop (z1, a1, w2, z2, a2, Y, X)
        w1, b1, w2, b2 = update_params (w1, b1, w2, b2, dw1, db1, dw2, db2, alpha)
        print("Iteration: ", i) 
        print("Accuracy: ", get_accuracy(get_predictions (a2), Y))  
    return w1,b1,w2,b2

w1, b1, w2, b2 = gradient_descent(X_train,Y_train , 100, 0.1)    
    
    
    

Iteration:  0
[2 2 8 ... 7 2 5] [2 9 6 ... 8 8 7]
Accuracy:  0.10231666666666667
Iteration:  1
[2 2 2 ... 7 2 5] [2 9 6 ... 8 8 7]
Accuracy:  0.17558333333333334
Iteration:  2
[2 7 2 ... 7 2 5] [2 9 6 ... 8 8 7]
Accuracy:  0.22553333333333334
Iteration:  3
[2 7 1 ... 7 2 5] [2 9 6 ... 8 8 7]
Accuracy:  0.2629
Iteration:  4
[2 7 1 ... 7 2 5] [2 9 6 ... 8 8 7]
Accuracy:  0.29225
Iteration:  5
[2 7 1 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.31688333333333335
Iteration:  6
[2 7 1 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.33935
Iteration:  7
[2 7 1 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.3606333333333333
Iteration:  8
[2 7 2 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.37755
Iteration:  9
[2 7 2 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.39386666666666664
Iteration:  10
[2 7 2 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.41078333333333333
Iteration:  11
[2 7 2 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.4241
Iteration:  12
[2 7 4 ... 7 3 5] [2 9 6 ... 8 8 7]
Accuracy:  0.43115
Iteration:  13
[2 7