In [2]:
import pandas as pd
import numpy as np
import visualization
import math

# Neural Network Class

In [3]:
class NeuralNetwork:
    
    def __init__(self,inputNodes,hiddenList,hiddenActivations,outputNodes=1,outputActivation="SIGMOID"):
        self.inputNodes=inputNodes
        self.outputNodes=outputNodes
        self.hiddenList=[inputNodes]+hiddenList+[outputNodes]
        # Nodes in each Layer (Also stores corresponding to input and output layer)
        self.Activations=["None"]+hiddenActivations+[outputActivation]
        # Activation Functions (Also stores corresponding to input and output layer)
        self.weight=[]
        self.os=[]
        self.net=[]
        self.deltas=[]
        self.loss=0
        self.trainsteps=0
        self.epoch=0
    
    # Initialise weight Matrices Randomly
    def initialiseWeight(self):
        layers=len(self.hiddenList)
        self.weight=[]
        self.trainsteps=0
        self.epoch=0
        
        for i in range(1,layers,1):
            self.weight.append(np.random.randn(self.hiddenList[i],self.hiddenList[i-1]+1)*2)
    
    # Applies Activation Function
    def applyActivation(self,vector,activation="None"):
        if(activation=="RELU"):
            return vector*(vector>=0)
        elif(activation=="SIGMOID"):
            return (1.0/(1+np.exp(-vector)))
        else:
            return 1*vector
    
    def append1(self,x):
        [instances,attr]=x.shape
        bias=np.ones([instances,1])
        return np.concatenate((bias,x),axis=1)
    
    # Forward Pass a batch through the network
    def forwardPass(self,batchX):
        
        layers=len(self.weight)
        self.os=[self.append1(batchX)]
        self.net=[self.append1(batchX)]
                
        for i in range(layers):
            x=self.os[i]
            netj=np.matmul(x,self.weight[i].T)
            o=(self.applyActivation(netj,self.Activations[i+1]))
            if(i==layers-1):
                self.net.append(netj)
                self.os.append(o)
            else:
                self.net.append(self.append1(netj))
                self.os.append(self.append1(o))
    
    # Euclidean Loss -> if Changed, gradient J wrt o must be updated
    def lossfunction(self,pred,y):
        return (pred-y)**2
    
    # Function returning loss on batch
    def getLoss(self,batchX,batchY):
        self.forwardPass(batchX)
        layers=len(self.os)
        finalOutput=self.os[layers-1]
        return np.sum(self.lossfunction(finalOutput,batchY))

    # Gradient of o wrt net 
    def gradient_o_net(self,o,net,activation="None"):
        if(activation=="RELU"):
            return (net>=0)
        elif(activation=="SIGMOID"):
            return o*(1-o)
        else:
            return (o==o)
    
    def gradient_o_net_layer(self,layerNo):
        return self.gradient_o_net(self.os[layerNo],self.net[layerNo],self.Activations[layerNo])
    
    # Gradient of J wrt o
    def gradient_J_o(self,o,y):
        return 2*(o-y)        

    # Backward Pass a batch through the network updating Weight Matrices   
    def backwardPass(self,batchX,batchY,learningRate=0.01):
                
        layers=len(self.os)
#         m=float(1)
        m=float((batchX.shape)[0])
        self.deltas=[]
        
        finalDelta=self.gradient_J_o(self.os[layers-1],batchY)*self.gradient_o_net_layer(layers-1)        
        self.deltas.append(finalDelta)
        weightMatrixLen=len(self.weight)
        
        for i in range(layers-2,0,-1):
            weightMatrixLen-=1
            current_o_net=self.gradient_o_net_layer(i)
            current_delta=np.matmul(self.deltas[0],self.weight[weightMatrixLen])
            current_delta=current_delta*current_o_net
            [_,attributes]=current_delta.shape
            self.deltas=[current_delta[:,1:attributes]]+self.deltas
        
        layers=len(self.weight)
         
        for i in range(layers-1,-1,-1):
            del_w=np.matmul(self.deltas[i].T,self.os[i])
            self.weight[i]-=(learningRate/m)*del_w

    # training the NN
    def train(self,X,Y,learningRate=0.1,batchMode=False,batchSize=100,epsilon=0.0001,epochsToRun=500,logEvery=50,decay=False):
        
        [instances,attributes]=X.shape
        [_,outputs]=Y.shape
        
        if(batchMode==False):
            batchSize=instances
        
        lr=learningRate
        Trained=False
        prevLoss=self.getLoss(X,Y)
        epochs=0
        
        while(not Trained and epochs<epochsToRun):
            self.epoch+=1
            epochs+=1
            
            cur=0
            while(cur<instances):
                uplim=min(cur+batchSize,instances)
                batchX=X[cur:uplim,0:attributes]
                batchY=Y[cur:uplim,0:outputs]
                cur=uplim
                self.forwardPass(batchX)
                self.trainsteps+=1
                if(decay):
                    lr=learningRate/math.sqrt(float(self.trainsteps))
                self.backwardPass(batchX,batchY,lr)
                
            
            curLoss=self.getLoss(X,Y)
            
            if(abs(curLoss-prevLoss)<epsilon):
                Trained=True
            
            prevLoss=curLoss
            if(self.epoch%logEvery==0):
                print ("Epoch:",self.epoch,"Loss:",prevLoss,"Accuracy:",self.getAccuracy(X,Y))
        
    def predict(self,X):
        self.forwardPass(X)
        layers=len(self.os)
        return self.os[layers-1]
    
    def predict_for_plot(self,X):
        return (self.predict(X)[:,0:]>=0.5).astype(int)
    
    def getAccuracy(self,X,Y):
        pred=self.predict(X)
        instances=float(X.shape[0])
        
        pred=(pred>=0.5).astype(int)
        
        return (np.sum(pred==Y))/instances
        
            
            
        

# Loading Data

In [10]:
xtrain=pd.read_csv("Dataset/NN/toy_data/toy_trainX.csv",header=None,sep=',').values
ytrain=pd.read_csv("Dataset/NN/toy_data/toy_trainY.csv",header=None,sep=',').values
xtest=pd.read_csv("Dataset/NN/toy_data/toy_testX.csv",header=None,sep=',').values
ytest=pd.read_csv("Dataset/NN/toy_data/toy_testY.csv",header=None,sep=',').values

In [11]:
# xtrain=pd.read_csv("Dataset/NN/mnist_data/MNIST_train.csv",header=None,sep=',').values
# ytrain=xtrain[:,784:785]
# xtrain=xtrain[:,0:784]
# xtest=pd.read_csv("Dataset/NN/mnist_data/MNIST_test.csv",header=None,sep=',').values
# ytest=xtest[:,784:785]
# xtest=xtest[:,0:784]
# ytrain=(ytrain==6).astype(int)
# ytest=(ytest==6).astype(int)
# xtrain=xtrain/255.0
# xtest=xtest/255.0

In [12]:
# print(xtest)

In [13]:
print(xtrain.shape)
print(ytrain.shape)
print(xtest.shape)
print(ytest.shape)
print(ytrain[:,0:].shape)

(380, 2)
(380, 1)
(120, 2)
(120, 1)
(380, 1)


# Testing NN

In [36]:
# NN_obj=NeuralNetwork(xtrain.shape[1],[5,5],["SIGMOID","SIGMOID"],ytrain.shape[1])
# NN_obj=NeuralNetwork(xtrain.shape[1],[5,5],["RELU","RELU"],ytrain.shape[1])
# NN_obj=NeuralNetwork(xtrain.shape[1],[5],["SIGMOID"],ytrain.shape[1])
NN_obj=NeuralNetwork(xtrain.shape[1],[5,5],["SIGMOID","SIGMOID"],ytrain.shape[1])

In [45]:
NN_obj.initialiseWeight()

In [46]:
for weights in NN_obj.weight:
    print(weights.shape)

(5, 3)
(5, 6)
(1, 6)


In [47]:
NN_obj.getLoss(xtrain,ytrain)

151.97819622493637

In [52]:
# NN_obj.train(X=xtrain,Y=ytrain,learningRate=1,batchMode=False,batchSize=100,epochsToRun=4000,logEvery=100)
NN_obj.train(X=xtrain,Y=ytrain,learningRate=1,batchMode=True,batchSize=100,epochsToRun=20000,logEvery=100,decay=False)

Epoch: 20100 Loss: 22.647319177426148 Accuracy: 0.9236842105263158
Epoch: 20200 Loss: 22.63623578699005 Accuracy: 0.9236842105263158
Epoch: 20300 Loss: 22.625251743143863 Accuracy: 0.9236842105263158
Epoch: 20400 Loss: 22.614366217420656 Accuracy: 0.9236842105263158
Epoch: 20500 Loss: 22.60357853539456 Accuracy: 0.9236842105263158
Epoch: 20600 Loss: 22.592888158583357 Accuracy: 0.9236842105263158
Epoch: 20700 Loss: 22.582294667345064 Accuracy: 0.9236842105263158
Epoch: 20800 Loss: 22.571797744730873 Accuracy: 0.9236842105263158
Epoch: 20900 Loss: 22.561397161266697 Accuracy: 0.9236842105263158
Epoch: 21000 Loss: 22.551092760641502 Accuracy: 0.9236842105263158
Epoch: 21100 Loss: 22.540884446285325 Accuracy: 0.9263157894736842
Epoch: 21200 Loss: 22.530772168822928 Accuracy: 0.9263157894736842


In [53]:
print(NN_obj.epoch)
print (NN_obj.getAccuracy(xtrain,ytrain))
print (NN_obj.getAccuracy(xtest,ytest))

21268
0.9263157894736842
0.8583333333333333


# Visualization

In [58]:
visualization.plot_decision_boundary(model=NN_obj.predict_for_plot,X=xtrain,y=ytrain[:,0],filename="NewPlot/NN_DB_Train_2Layer")

In [56]:
visualization.plot_decision_boundary(model=NN_obj.predict_for_plot,X=xtest,y=ytest[:,0],filename="NewPlot/NN_DB_Test_2Layer")

In [57]:
for i in [1,2,3,5,10,20,40]:
    NN_obj=NeuralNetwork(xtrain.shape[1],[i],["SIGMOID"],ytrain.shape[1])
    NN_obj.initialiseWeight()
    NN_obj.train(X=xtrain,Y=ytrain,learningRate=1,batchMode=False,batchSize=100,epochsToRun=20000,logEvery=100,decay=False)
    print (NN_obj.getAccuracy(xtrain,ytrain))
    print (NN_obj.getAccuracy(xtest,ytest))
    visualization.plot_decision_boundary(model=NN_obj.predict_for_plot,X=xtrain,y=ytrain[:,0],filename="NewPlot/NN_DB_Train_"+str(i)+"Best")
    visualization.plot_decision_boundary(model=NN_obj.predict_for_plot,X=xtest,y=ytest[:,0],filename="NewPlot/NN_DB_Test_"+str(i)+"Best")    