In [1]:
import pandas as pd
from scipy.stats import truncnorm
import numpy as np 
import matplotlib.pyplot as plt 
from scipy.special import expit as activation_function

In [2]:
df = pd.read_csv("fashion-mnist_train.csv")
df2 = pd.read_csv("fashion-mnist_test.csv")

In [3]:
data = np.array(df)
np.random.shuffle(data)
n_samples,n_features = data.shape

data_train = data[0:n_samples].T
Y_train = data_train[0]
X_train = data_train[1:n_features]/255
#data_train.shape


In [4]:
data_test = data[0:n_samples].T
Y_test = data_test[0]
X_test = data_test[1:n_features]/255
#X_test
#data_test.shape

In [5]:
#all of these helper functions were taken from the labs 6 and 7

#relu function
@np.vectorize
def ReLU(x):
    return np.maximum(0,x)

#dderivative of Relu
@np.vectorize
def d_ReLU(x):
    return x > 0

#softmax function
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis = 0)

#sigmoid function
@np.vectorize
#def sigmoid(x):
 #   return 1 / (1 + np.e ** -x)

def sigmoid(x):
    if x < 0:
        return np.exp(x)/(1+np.exp(x))
    else:
        return 1/(1+np.exp(-x))
    
#derivative of sigmoid
@np.vectorize
def d_sigmoid(x):
    return x * (1.0 - x)


def one_hot_encode_Y(Y):
    newY = np.zeros ( (Y.size, Y.max() + 1))
    newY[np.arange(Y.size), Y] = 1
    newY = newY.T
    return newY

#this function I created myself for the stopping critera
def difference(arr):
    return [item-arr[i-1] for i, item in enumerate(arr) if i != 0]

In [12]:
class neuralNetwork():
    
    def __init__(self, 
                 no_of_in_nodes, 
                 no_of_out_nodes, 
                 no_of_hidden_nodes,
                 learning_rate):
        self.no_of_in_nodes = no_of_in_nodes
        self.no_of_out_nodes = no_of_out_nodes
        self.no_of_hidden_nodes = no_of_hidden_nodes
        self.learning_rate = learning_rate 
        self.create_weight_matrices()
        self.newY = one_hot_encode_Y(self.no_of_out_nodes)
        
    def create_weight_matrices(self):
        self.w1 = np.random.rand(self.no_of_hidden_nodes, 784) - 0.5 
        self.b1 = np.random.rand(self.no_of_hidden_nodes, 1) - 0.5
        self.w2 = np.random.rand(self.no_of_hidden_nodes, self.no_of_hidden_nodes) - 0.5 
        self.b2 = np.random.rand(self.no_of_hidden_nodes, 1) - 0.5
        self.w3 = np.random.rand(self.no_of_hidden_nodes, self.no_of_hidden_nodes) - 0.5 
        self.b3 = np.random.rand(self.no_of_hidden_nodes, 1) - 0.5
        
        return self.w1, self.b1, self.w2, self.b2, self.w3, self.b3
    
    def get_predictions(self):
        return np.argmax(self.a2, 0)

    def get_accuracy(self,predictions):
        return (np.sum(predictions == self.no_of_out_nodes) / self.no_of_out_nodes.size) * 100
        
    def SGDTrain(self, epochs):
        self.create_weight_matrices()
        acc_list = []
        for i in range(epochs):
            
            #forward pass with both
            self.z1 = self.w1.dot(self.no_of_in_nodes) + self.b1
            self.a1 = ReLU(self.z1)
            self.z2 = self.w2.dot(self.a1) + self.b2
            self.a2 = sigmoid(self.z2)
            self.z3 = self.w3.dot(self.a2) + self.b3
            self.a3 = softmax(self.z3)
            
            #backwards pass with both
            scaling = 1/self.no_of_out_nodes.size    
            self.z3_delta = self.a3 - self.newY
            self.w3_delta = scaling * self.z3_delta.dot(self.a2.T)
            self.b3_delta = scaling * np.sum(self.z3_delta)
            self.z2_delta = self.w3.T.dot(self.z3_delta) * d_ReLU(self.z2)
            self.w2_delta = scaling * self.z2_delta.dot(self.a1.T)
            self.b2_delta = scaling * np.sum(self.z2_delta)
            self.z1_delta = self.w2.T.dot(self.z2_delta) * d_sigmoid(self.z1)
            self.w1_delta = scaling * self.z1_delta.dot(self.no_of_in_nodes.T)
            self.b1_delta = scaling * np.sum(self.z1_delta)
            
            
            #update weights
            self.w1 -= self.learning_rate * self.w1_delta
            self.b1 -= self.learning_rate * self.b1_delta
            self.w2 -= self.learning_rate * self.w2_delta
            self.b2 -= self.learning_rate * self.b2_delta
            self.w3 -= self.learning_rate * self.w3_delta
            self.b3 -= self.learning_rate * self.b3_delta
            
            
            print(f'epoch: {i+1}') 
            print(f'Accuracy: {(self.get_accuracy(self.get_predictions ())):.2f}%')
            acc_list.append(self.get_accuracy(self.get_predictions ()))
            
            #stopping criteria so if accuracy increase 2 times in a row is less than 0.2 it stops training
            diff = difference(acc_list[-3:])
            if len(diff) > 1:
                if(diff[-1] < 0.2 and diff[-2] < 0.2):
                    break
                #debugging my stopping criteria
              #  print(diff[-1])
        return acc_list


In [14]:
model = neuralNetwork(X_train,Y_train, 10 ,0.1)
acc = model.SGDTrain(100)

epoch: 1
Accuracy: 12.08%
epoch: 2
Accuracy: 8.58%
epoch: 3
Accuracy: 10.01%
epoch: 4
Accuracy: 10.00%
epoch: 5
Accuracy: 10.00%
