In [2]:
import pandas as pd
import numpy as np
import visualization
import math

# Neural Network Class

In [3]:
class NeuralNetwork:
    
    def __init__(self,inputNodes,hiddenList,hiddenActivations,outputNodes=1,outputActivation="SIGMOID"):
        self.inputNodes=inputNodes
        self.outputNodes=outputNodes
        self.hiddenList=[inputNodes]+hiddenList+[outputNodes]
        # Nodes in each Layer (Also stores corresponding to input and output layer)
        self.Activations=["None"]+hiddenActivations+[outputActivation]
        # Activation Functions (Also stores corresponding to input and output layer)
        self.weight=[]
        self.os=[]
        self.net=[]
        self.deltas=[]
        self.loss=0
        self.trainsteps=0
        self.epoch=0
    
    # Initialise weight Matrices Randomly
    def initialiseWeight(self):
        layers=len(self.hiddenList)
        self.weight=[]
        self.trainsteps=0
        self.epoch=0
        
        for i in range(1,layers,1):
            self.weight.append(np.random.randn(self.hiddenList[i],self.hiddenList[i-1]+1)*2)
    
    # Applies Activation Function
    def applyActivation(self,vector,activation="None"):
        if(activation=="RELU"):
            return vector*(vector>=0)
        elif(activation=="SIGMOID"):
            return (1.0/(1+np.exp(-vector)))
        else:
            return 1*vector
    
    def append1(self,x):
        [instances,attr]=x.shape
        bias=np.ones([instances,1])
        return np.concatenate((bias,x),axis=1)
    
    # Forward Pass a batch through the network
    def forwardPass(self,batchX):
        
        layers=len(self.weight)
        self.os=[self.append1(batchX)]
        self.net=[self.append1(batchX)]
                
        for i in range(layers):
            x=self.os[i]
            netj=np.matmul(x,self.weight[i].T)
            o=(self.applyActivation(netj,self.Activations[i+1]))
            if(i==layers-1):
                self.net.append(netj)
                self.os.append(o)
            else:
                self.net.append(self.append1(netj))
                self.os.append(self.append1(o))
    
    # Euclidean Loss -> if Changed, gradient J wrt o must be updated
    def lossfunction(self,pred,y):
        return (pred-y)**2
    
    # Function returning loss on batch
    def getLoss(self,batchX,batchY):
        self.forwardPass(batchX)
        layers=len(self.os)
        finalOutput=self.os[layers-1]
        return np.sum(self.lossfunction(finalOutput,batchY))

    # Gradient of o wrt net 
    def gradient_o_net(self,o,net,activation="None"):
        if(activation=="RELU"):
            return (net>=0)
        elif(activation=="SIGMOID"):
            return o*(1-o)
        else:
            return (o==o)
    
    def gradient_o_net_layer(self,layerNo):
        return self.gradient_o_net(self.os[layerNo],self.net[layerNo],self.Activations[layerNo])
    
    # Gradient of J wrt o
    def gradient_J_o(self,o,y):
        return 2*(o-y)        

    # Backward Pass a batch through the network updating Weight Matrices   
    def backwardPass(self,batchX,batchY,learningRate=0.01):
                
        layers=len(self.os)
#         m=float(1)
        m=float((batchX.shape)[0])
        self.deltas=[]
        
        finalDelta=self.gradient_J_o(self.os[layers-1],batchY)*self.gradient_o_net_layer(layers-1)        
        self.deltas.append(finalDelta)
        weightMatrixLen=len(self.weight)
        
        for i in range(layers-2,0,-1):
            weightMatrixLen-=1
            current_o_net=self.gradient_o_net_layer(i)
            current_delta=np.matmul(self.deltas[0],self.weight[weightMatrixLen])
            current_delta=current_delta*current_o_net
            [_,attributes]=current_delta.shape
            self.deltas=[current_delta[:,1:attributes]]+self.deltas
        
        layers=len(self.weight)
         
        for i in range(layers-1,-1,-1):
            del_w=np.matmul(self.deltas[i].T,self.os[i])
            self.weight[i]-=(learningRate/m)*del_w

    # training the NN
    def train(self,X,Y,learningRate=0.1,batchMode=False,batchSize=100,epsilon=0.0001,epochsToRun=500,logEvery=50,decay=False):
        
        [instances,attributes]=X.shape
        [_,outputs]=Y.shape
        
        if(batchMode==False):
            batchSize=instances
        
        lr=learningRate
        Trained=False
        prevLoss=self.getLoss(X,Y)
        epochs=0
        
        while(not Trained and epochs<epochsToRun):
            self.epoch+=1
            epochs+=1
            
            cur=0
            while(cur<instances):
                uplim=min(cur+batchSize,instances)
                batchX=X[cur:uplim,0:attributes]
                batchY=Y[cur:uplim,0:outputs]
                cur=uplim
                self.forwardPass(batchX)
                self.trainsteps+=1
                if(decay):
                    lr=learningRate/math.sqrt(float(self.trainsteps))
                self.backwardPass(batchX,batchY,lr)
                
            
            curLoss=self.getLoss(X,Y)
            
            if(abs(curLoss-prevLoss)<epsilon):
                Trained=True
            
            prevLoss=curLoss
            if(self.epoch%logEvery==0):
                print ("Epoch:",self.epoch,"Loss:",prevLoss,"Accuracy:",self.getAccuracy(X,Y))
        
    def predict(self,X):
        self.forwardPass(X)
        layers=len(self.os)
        return self.os[layers-1]
    
    def predict_for_plot(self,X):
        return self.predict(X)[:,0:]
    
    def getAccuracy(self,X,Y):
        pred=self.predict(X)
        instances=float(X.shape[0])
        
        pred=(pred>=0.5).astype(int)
        
        return (np.sum(pred==Y))/instances
        
            
            
        

# Loading Data

In [4]:
# xtrain=pd.read_csv("Dataset/NN/toy_data/toy_trainX.csv",header=None,sep=',').values
# ytrain=pd.read_csv("Dataset/NN/toy_data/toy_trainY.csv",header=None,sep=',').values
# xtest=pd.read_csv("Dataset/NN/toy_data/toy_testX.csv",header=None,sep=',').values
# ytest=pd.read_csv("Dataset/NN/toy_data/toy_testY.csv",header=None,sep=',').values

In [5]:
xtrain=pd.read_csv("Dataset/NN/mnist_data/MNIST_train.csv",header=None,sep=',').values
ytrain=xtrain[:,784:785]
xtrain=xtrain[:,0:784]
xtest=pd.read_csv("Dataset/NN/mnist_data/MNIST_test.csv",header=None,sep=',').values
ytest=xtest[:,784:785]
xtest=xtest[:,0:784]
ytrain=(ytrain==6).astype(int)
ytest=(ytest==6).astype(int)
xtrain=xtrain/255.0
xtest=xtest/255.0

In [6]:
# print(xtest)

In [7]:
print(xtrain.shape)
print(ytrain.shape)
print(xtest.shape)
print(ytest.shape)
print(ytrain[:,0:].shape)

(10000, 784)
(10000, 1)
(3600, 784)
(3600, 1)
(10000, 1)


# Testing NN

In [43]:
# NN_obj=NeuralNetwork(xtrain.shape[1],[5,5],["SIGMOID","SIGMOID"],ytrain.shape[1])
# NN_obj=NeuralNetwork(xtrain.shape[1],[5,5],["RELU","RELU"],ytrain.shape[1])
# NN_obj=NeuralNetwork(xtrain.shape[1],[5],["SIGMOID"],ytrain.shape[1])
NN_obj=NeuralNetwork(xtrain.shape[1],[100],["SIGMOID"],ytrain.shape[1])

In [44]:
NN_obj.initialiseWeight()

In [45]:
for weights in NN_obj.weight:
    print(weights.shape)

(100, 785)
(1, 101)


In [46]:
NN_obj.getLoss(xtrain,ytrain)

5012.035007657304

In [51]:
# NN_obj.train(X=xtrain,Y=ytrain,learningRate=1,batchMode=False,batchSize=100,epochsToRun=4000,logEvery=100)
NN_obj.train(X=xtrain,Y=ytrain,learningRate=1,batchMode=True,batchSize=100,epochsToRun=100,logEvery=2,decay=False)

Epoch: 202 Loss: 52.711729212019975 Accuracy: 0.9948
Epoch: 204 Loss: 52.613070429461615 Accuracy: 0.9948
Epoch: 206 Loss: 52.49920505997789 Accuracy: 0.9948
Epoch: 208 Loss: 52.36179974146111 Accuracy: 0.9948
Epoch: 210 Loss: 52.196288533758654 Accuracy: 0.9948
Epoch: 212 Loss: 52.01063437415349 Accuracy: 0.9948
Epoch: 214 Loss: 51.81693953468284 Accuracy: 0.9949
Epoch: 216 Loss: 51.60942985515742 Accuracy: 0.9949
Epoch: 218 Loss: 51.39046467463632 Accuracy: 0.995
Epoch: 220 Loss: 51.1956010607846 Accuracy: 0.995
Epoch: 222 Loss: 51.045163424645736 Accuracy: 0.995
Epoch: 224 Loss: 50.936474987807614 Accuracy: 0.995
Epoch: 226 Loss: 50.852017174012715 Accuracy: 0.995
Epoch: 228 Loss: 50.77969377843442 Accuracy: 0.995
Epoch: 230 Loss: 50.713439548452286 Accuracy: 0.995
Epoch: 232 Loss: 50.64969879213683 Accuracy: 0.995
Epoch: 234 Loss: 50.58575748858776 Accuracy: 0.995
Epoch: 236 Loss: 50.51900454926865 Accuracy: 0.995
Epoch: 238 Loss: 50.446502170893105 Accuracy: 0.995
Epoch: 240 Loss:

In [52]:
print(NN_obj.epoch)
print (NN_obj.getAccuracy(xtrain,ytrain))
print (NN_obj.getAccuracy(xtest,ytest))

300
0.9956
0.9830555555555556


# Visualization

In [582]:
# visualization.plot_decision_boundary(model=NN_obj.predict_for_plot,X=xtrain,y=ytrain[:,0],filename="NN_DB_Train")

In [594]:
# visualization.plot_decision_boundary(model=NN_obj.predict_for_plot,X=xtest,y=ytest[:,0],filename="NN_DB_Test_RELU")