In [2]:
import pandas as pd
from scipy.stats import truncnorm
import numpy as np 
import matplotlib.pyplot as plt 
from scipy.special import expit as activation_function

In [3]:
#reading the dataset
df = pd.read_csv("archive/fashion-mnist_train.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
#laying out the dataset into a np array to make it easier to manipulate and read
df = np.array(df)
m, n = df.shape
np.random.shuffle(df)

data_train = df[1000:m].T
Y_train = data_train[0] 
#normalising the train data so that I can get the values all from 0-1 
X_train = data_train[1:n]/255.
#print(sum(X_train))

In [5]:
print(data_train[0])

[7 8 6 ... 9 8 4]


In [6]:
X_train[:,0].shape

(784,)

In [10]:
#defining the parameters for the weights 
def init_params():
    w1 = np.random.rand(10, 784) - 0.5 
    b1 = np.random.rand(10, 1) - 0.5
    w2 = np.random.rand(10, 10) - 0.5 
    b2 = np.random.rand(10, 1) - 0.5
    return w1, b1, w2, b2

def ReLU(z):
    return np.maximum(0,z)

def sigmoid(z):
    return 1 / (1 + numpy.exp(-z))

def softmax(z):
    return np.exp(z) / np.sum(np.exp(z), axis = 0)

def sgd():
    vdw, vdb = 0,0

def Adam():
    vdw,sdw,vdb,sdb = 0,0,0,0

def forward_prop(w1, b1, w2, b2, X):
    z1 = w1.dot(X) + b1
    a1 = ReLU(z1)
    z2 = w2.dot(a1) + b2
    a2 = softmax(z2)
    return z1, a1, z2, a2
    
def one_hotY(Y):
    one_hot_Y = np.zeros ( (Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

one_hot_Y = one_hotY(Y_train)

def one_hotX(X):
    one_hot_X = np.zeros ( (X.size, X.max() + 1))
    one_hot_X[np.arange(X.size), X] = 1
    one_hot_X = one_hot_X.T
    return one_hot_X

def derivative_ReLU(z):
    return z > 0
    
def back_prop(z1, a1, w2, z2, a2, Y, X):
    m = len(Y)
    dz2 = a2 - one_hot_Y
    dw2 = 1/m * dz2.dot(a1.T)
    db2 = 1/m * np.sum(dz2)
    dz1 = w2.T.dot(dz2) * derivative_ReLU(z1)
    dw1 = 1/m * dz1.dot(X.T)
    db1 = 1/m * np.sum(dz1)
    return dw1, db1, dw2, db2

def update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, lr):
    w1 = w1 - lr * dw1
    b1 = b1 - lr * db1
    w2 = w2 - lr * dw2
    b2 = b2 - lr * db2
    return w1, b1, w2, b2
    
def get_predictions (a2):
    return np.argmax(a2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X,Y, iterations, alpha):
    w1, b1, w2, b2 = init_params()
    for i in range (iterations):
        z1, a1, z2, a2 = forward_prop(w1, b1, w2, b2, X)
        dw1, db1, dw2, db2 = back_prop (z1, a1, w2, z2, a2, Y, X)
        w1, b1, w2, b2 = update_params (w1, b1, w2, b2, dw1, db1, dw2, db2, alpha)
        print("Iteration: ", i) 
        print("Accuracy: ", get_accuracy(get_predictions (a2), Y))  
    return w1,b1,w2,b2

w1, b1, w2, b2 = gradient_descent(X_train,Y_train , 100, 0.1)    

Iteration:  0
[7 1 0 ... 4 1 4] [7 8 6 ... 9 8 4]
Accuracy:  0.1954915254237288
Iteration:  1
[7 1 0 ... 7 1 0] [7 8 6 ... 9 8 4]
Accuracy:  0.20728813559322035
Iteration:  2
[7 3 0 ... 7 1 4] [7 8 6 ... 9 8 4]
Accuracy:  0.21616949152542372
Iteration:  3
[7 3 0 ... 7 1 8] [7 8 6 ... 9 8 4]
Accuracy:  0.2217457627118644
Iteration:  4
[7 3 3 ... 7 1 8] [7 8 6 ... 9 8 4]
Accuracy:  0.22932203389830508
Iteration:  5
[7 3 3 ... 7 1 8] [7 8 6 ... 9 8 4]
Accuracy:  0.23783050847457626
Iteration:  6
[7 3 3 ... 7 1 8] [7 8 6 ... 9 8 4]
Accuracy:  0.24649152542372882
Iteration:  7
[7 3 8 ... 7 1 8] [7 8 6 ... 9 8 4]
Accuracy:  0.25572881355932203
Iteration:  8
[7 3 8 ... 7 1 3] [7 8 6 ... 9 8 4]
Accuracy:  0.2647118644067797
Iteration:  9
[7 3 8 ... 7 1 3] [7 8 6 ... 9 8 4]
Accuracy:  0.2729830508474576
Iteration:  10
[7 3 8 ... 7 1 3] [7 8 6 ... 9 8 4]
Accuracy:  0.2803728813559322
Iteration:  11
[7 3 8 ... 7 1 3] [7 8 6 ... 9 8 4]
Accuracy:  0.2873728813559322
Iteration:  12
[7 3 8 ... 7 1 3]