In [8]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras
from keras.datasets import fashion_mnist

#importing the dataset from keras library
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# #one data image per label
# fig,ax=plt.subplots(nrows=10,figsize=(15,15))
# for i in range(10):
#     ax[i].set_title("\n class {} image".format(i))
#     ax[i].axis("off")
#     x=x_train[y_train==i]
#     ax[i].imshow(x[0,:,:],cmap="gray")

In [9]:
#normalizing the data between 0-1
x_train=x_train/255
x_train=x_train.astype(float);
x_test=x_test/255
x_test=x_test.astype(float);

#flattening the data points to 1D
x_train=x_train.reshape(60000,784)
x_test=x_test.reshape(10000,784)

In [136]:
class FeedForwardNN:
    def __init__(self,epochs,noOfHL,NeuronsPL,noOfClass,X_train,y_train,x_test,y_test,learningRate,batchSize):
        self.noOfHL=noOfHL
        self.ListOfNeuronsPL=NeuronsPL
        self.noOfClass=noOfClass
        self.x_train=x_train
        self.y_train=y_train
        self.epochs=epochs
        self.learningRate=learningRate
        self.batchSize=batchSize
        #initialise the initial weights matrix which contains noOfHiddenLayers+1  weight matrices
        self.W=self.initialize_weights()
        #initialise the initial biases matrix which contains noOfHiddenLayers+1  biases matrices
        self.b=self.initialize_biases()

    #returns the weight matrix for the initial configuration, we have used 1 indexing for weights
    def initialize_weights(self):
        weight=[0]*(self.noOfHL+2)
        for i in range(self.noOfHL+1):
            if(i==0):
                continue
            if(i==1):
                w=np.random.normal(0, 1, size=(self.ListOfNeuronsPL[i-1],self.x_train.shape[1]))
            else:
                w=np.random.normal(0,1,size=(self.ListOfNeuronsPL[i-1],self.ListOfNeuronsPL[i-2]))
            weight[i]=w
        w=np.random.normal(0,1,size=(self.noOfClass,self.ListOfNeuronsPL[self.noOfHL-1]))
        weight[self.noOfHL+1]=w
        return weight
    #returns the biases matrix for the initial configurtion, we have used 1 indexing for biases
    def initialize_biases(self):
        biases=[0]*(self.noOfHL+2)
        for i in range(self.noOfHL+1):
            if(i==0):
                continue
            else:
                b=np.ones(self.ListOfNeuronsPL[i-1])
            biases[i]=b
        b=np.ones(self.noOfClass)
        biases[self.noOfHL+1]=b
        return biases
    #returns sigmoid value of a variable x
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    #returns softmax value of a variable x
    def softmax(self,x):
        exp_x=np.exp(x)
        sum_e=np.sum(exp_x)
        return exp_x/sum_e
    
    def activationFunc(self,A):
        A=np.array(A)
        H=self.sigmoid(A)
        return H
    
    #calculates and returns the predicted values of y using the output Function
    def outputFunc(self,A):
        yhat=self.softmax(A)
        return yhat
        
    #calculates all the H,A and yhat in the forward propogation of backpropogation
    def forwardPropogation(self,i):
        L=self.noOfHL+1
        k=self.noOfClass
        H=[0]*(L)
        A=[0]*(L+1)
        H[0]=np.array(self.x_train[i])
        for i in range(L-1):
            A[i+1]=self.b[i+1]+np.dot(self.W[i+1],H[i])
            H[i+1]=self.activationFunc(A[i+1])
        A[L]=self.b[L]+np.dot(self.W[L],H[L-1])
        yhat=self.outputFunc(A[L])
        yhat=np.array(yhat)
        #print("yhat:  \n",yhat.shape)
        return  H,A,yhat
    
    #derivative of loss wrt to activation of last layer if output function used is softmax
    def derivative_wrt_outputFunc(self,yhat,y_train,i):
        k=self.noOfClass
        e_y=np.zeros(k)
        e_y[y_train[i]]=1
        return -1*(e_y-yhat)
    def cal_activationFun_grad(self,As):
#         print(type(As))
        g_dash=[]
        for i in As:
            g_dash.append(self.sigmoid(i)*(1-self.sigmoid(i)))
        return g_dash
        
    def backwardPropogation(self,i,Hs,As,yhat,y_train):
        W=self.W
        L=self.noOfHL+1
        weights_grad=[0]*(L+1)
        biases_grad=[0]*(L+1)
        activation_grad=[0]*(L+1)
        preactivation_grad=[0]*(L+1)
        activationFunc_grad=[]
        preactivation_grad[L]=self.derivative_wrt_outputFunc(yhat,y_train,i)
        for k in range(L+1)[::-1]:
            if(i==0):
                continue
            #gradient of loss wrt to weights at layer k
            weights_grad[k]=np.outer(preactivation_grad[k],np.transpose(Hs[k-1]))
#             print("weights_grad wrt loss at level {} is {}".format(k,type(weights_grad[k])))
            
            #gradient of loss wrt to biases at layer k
            biases_grad[k]=preactivation_grad[k]
#             print("baises_grad wrt loss at level {} is {}".format(k,biases_grad[k].shape))
            
            #for the next layer calculating gradient of loss wrt to activation
            activation_grad[k-1]=np.dot(np.transpose(W[k]),preactivation_grad[k])
#             print("activation_grad wrt loss at level {} is {}".format(k,activation_grad[k-1].shape))
            
            #calculate gradient of activation function wrt preactivation of previous layer
            if(k>1):
                activationFunc_grad=self.cal_activationFun_grad(As[k-1])
            
                #for the next layer calculating gradient of loss wrt to preactivation
                preactivation_grad[k-1]=np.multiply(activation_grad[k-1],activationFunc_grad)
            
        return weights_grad,biases_grad
    
    def acc_grad(self,final_grad,f_g):
        L=self.noOfHL+1
        for i in range(L+1):
            if(i==0):
                continue
            final_grad[i]=final_grad[i]+f_g[i]
        return final_grad
    def updateWeights(self,eta,weights_grad):
        L=self.noOfHL+1
        for i in range(L+1):
            if(i==0):
                continue
            self.W[i]=self.W[i]-(eta)*weights_grad[i]
        return
    
    def updateBiases(self,eta,biases_grad):
        L=self.noOfHL+1
        for i in range(L+1):
            if(i==0):
                continue
            self.b[i]=self.b[i]-(eta)*biases_grad[i]
        return
    
    def crossEntropy(self,yhat,i):
        return -1*np.log(yhat[self.y_train[i]])
            
        
    def _sgd(self):
        epochs=self.epochs
        L=self.noOfHL+1
        k=self.noOfClass
        x_train=self.x_train
        y_train=self.y_train
        eta=self.learningRate
        batchSize=self.batchSize
        deltaw=[]
        deltab=[]
        loss=[]
        trainingLoss=[]
        for epoch in range(epochs):
            loss=[]
            for i in range(x_train.shape[0]):
                if(i%batchSize==0):
                    if(i!=0):
                        #update the weights and biases
                        self.updateWeights(eta,deltaw)
                        self.updateBiases(eta,deltab)
                    Hs,As,yhat=self.forwardPropogation(i)
                    w_g,b_g=self.backwardPropogation(i,Hs,As,yhat,y_train)
                    if(i==0):
                        deltaw=w_g
                        deltab=b_g
                    else:
                        deltaw=self.acc_grad(deltaw,w_g)
                        deltab=self.acc_grad(deltab,b_g)
                else:
                    Hs,As,yhat=self.forwardPropogation(i)
                    w_g,b_g=self.backwardPropogation(i,Hs,As,yhat,y_train)
                    deltaw=self.acc_grad(deltaw,w_g)
                    deltab=self.acc_grad(deltab,b_g)
                #append loss for this datapoint
                loss.append(self.crossEntropy(yhat,i))   
            self.updateWeights(eta,deltaw)
            self.updateBiases(eta,deltab)
            trainingLoss.append(np.mean(loss))
            print("The loss after epoch:{} is {}".format(epoch,trainingLoss[epoch]))
            
               
                
                
                
                
            
            
        

In [137]:
NeuronsPL=[16,32]
FNNN=FeedForwardNN(10,2,NeuronsPL,10,x_train,y_train,x_test,y_test,0.0001,50)
FNNN._sgd()

The loss after epoch:0 is 1.600958029669534
The loss after epoch:1 is 1.1118926688941144
The loss after epoch:2 is 1.0192072254654014
The loss after epoch:3 is 0.9668913064847112
The loss after epoch:4 is 0.9783237165441742
The loss after epoch:5 is 0.995941746849247
The loss after epoch:6 is 0.912857240367062
The loss after epoch:7 is 0.9692652967050965


  return 1 / (1 + np.exp(-x))


The loss after epoch:8 is 1.0130265494291895
The loss after epoch:9 is 0.949381156494241
