# Assignment-5

### Import necessary libraries

In [None]:
import pandas as pd
import numpy as np


In [None]:
def splitTrainTest(data,percent):
    total=len(data)
    trainTotal=int(total*percent*0.01)
    testTotal=total-trainTotal
    return (data[0:trainTotal],data[trainTotal:total])

### Layer Class
This class's object represent the layers in neural network. It stores the number of neurons in each layers, activations, activation function associated with each layer and their weight vector (initialize on gaussian distribute with mean =0 and std deviation=1).

In [None]:
class Layer:
    def __init__(self,nNodesCurrent, nNodesNext, activationF):
        self.nodesNo=nNodesCurrent
        self.activations = np.zeros([nNodesCurrent,1])
        self.activationF=activationF
        if nNodesNext==0:
            self.weights=None
        else:
            self.weights=np.random.normal(0, 1, size=(nNodesCurrent,nNodesNext))

### Neural Network Class
Class of neural network to perform multiclass classification using the cross entropy as cost function and softmax as probabilty distribution activation function.

This class perform following tasks:
* Initialized the number of layers in neural network and create **Layer Class** object.
* Train the neural network on batches of inputs there by performing the forward and backward propogation using below helper methods.

#### Methods:
1. **Forward Propogation** : Perform the forward propogation, calculate and stores activations at each of the 
layer.
$$ z_1= w_1^TX $$
$$ a_1= f _1(z_1) $$
$$ z_2= w_2^Ta_1 $$
$$ \hat{y}= a_2= f_2(z_2) $$
2. **Calculate Error** : Here we calculate the cross entropy error of our neural network on the updated activations. This updation in activation take place after the updation of weights in gradient decent algorithm in backpropogation.
<br>
For multiclass classification we use the below cross entropy cost function:
$$ J =  -\sum\limits_{i} y \log \; \hat{y} $$
3. **Backward Propogation** : Here, we differentiate the cost function to minimize it, and find the optimal values of parameters ie weights at each of the layer.
$$\frac{\partial J}{\partial w_2 } =  \frac{\partial J}{\partial \hat{y} }  * \frac{\partial \hat{y}}{\partial z_2 }  * \frac{\partial z_2}{\partial w_2 }   $$
<br>
also for cross entropy as a cost function and softmax as $f_2(z_2)$ , 
$$ \delta_3  =  \frac{\partial J}{\partial \hat{y} }  * \frac{\partial \hat{y}}{\partial z_2 } = (y_p - y_a)  $$
<br>
$$ \therefore   \frac{\partial J}{\partial w_2 }= (\hat{y} - y) * a_1$$
<br><br>
Similarly to calculate parital derivative w.r.t weights of inner layers, we can use the chain rule
$$   \frac{\partial J}{\partial w_1 } = x^T *  \delta_3 *  w_2 *  f^1 (z_1)      $$

4. **Get Accuracy**: This function will return the accuracy of our neural network on multiclass classification.

<hr>

**Some important activations functions and their derivatives used in our network**
**Softmax**
<br>
$$\sigma (z)_j = \frac{e^{z_j}}{\sum^K_{k=1} e^{z_j}}$$
**Sigmoid**
<br>
$$sigmoid(x) = \frac{1}{1+\epsilon ^ {-x}}$$
$$\frac{\partial sigmoid(x)}{\partial x} = sigmoid(x) * ( 1- sigmoid(x))$$

**Relu**
<br>
$$relu(x) = \max{(0,x)}$$
<br>
$$\frac{\partial relu(x)}{\partial x} = 1 \;\;\;\; if x>0  \\ 0 \;\;\; elsewhere$$

**tanh**
<br>
$$tanh(x) = \tanh{(x)}$$
<br>
$$\frac{\partial tanh(x)}{\partial x} = 1 - \tanh^2{(x)}$$

In [283]:
class NeuralNet:
    def __init__(self, totalLayers, noNodesList, activationFunctions):
        self.totalLayers=totalLayers
        self.noNodesList=noNodesList
        self.layers = []
        for i in range(totalLayers):
            currentLayerNodes=noNodesList[i]
            if i!=totalLayers-1:
                nextLayerNodes=noNodesList[i+1]
                ith_Layer=Layer(currentLayerNodes,nextLayerNodes,activationFunctions[i])
            else:
                ith_Layer=Layer(currentLayerNodes,0,activationFunctions[i])
            self.layers.append(ith_Layer)#append output layer as none

    def trainNetwork(self, data,outputLabels, batchSize, epochs, learningRate):
        self.learningRate=learningRate
        self.batchSize=batchSize;
        
        #normalize data
#         data=((data-data.min(axis=0))/(data.max(axis=0)-data.min(axis=0)))
        data=data/255
        
        for x in range(epochs):
            i=0  
            while i<len(data):
                self.error=0
                self.forwardPropo(data[i:i+batchSize])#input
                self.calculateError(outputLabels[i:i+batchSize])#output
                self.backwardPropo(outputLabels[i:i+batchSize])
                i+=batchSize
            self.error /= batchSize
            print("Epoch ",x,"->Error: ", self.error)
        
          
    def forwardPropo(self, inputs):
        self.layers[0].activations =inputs
        for i in range(self.totalLayers-1):
            temp=np.matmul(self.layers[i].activations,self.layers[i].weights)  
            if self.layers[i+1].activationF == "sigmoid":
                self.layers[i+1].activations = self.sigmoid(temp)
            elif self.layers[i+1].activationF == "softmax":
                self.layers[i+1].activations = self.softmax(temp)
            elif self.layers[i+1].activationF == "relu":
                self.layers[i+1].activations = self.relu(temp)
            elif self.layers[i+1].activationF == "tanh":
                self.layers[i+1].activations = self.tanh(temp)
            else:
                self.layers[i+1].activations = temp
        
    def calculateError(self,labels):
        if len(labels[0]) != self.layers[self.totalLayers-1].nodesNo:
            print ("Error: Label is not of the same shape as output layer.")
            print("Label: ", len(labels), " : ", len(labels[0]))
            print("Out: ", len(self.layers[self.totalLayers-1].activations), " : ", len(self.layers[self.totalLayers-1].activations[0]))
            return
        self.error += np.negative(np.sum(np.multiply(labels, np.log(self.layers[self.totalLayers-1].activations))))
    
    def backwardPropo(self, labels):
        targets = labels
        i = self.totalLayers-1
        y = self.layers[i].activations
        
        delta=(y-targets)
        deltaw = np.dot(self.layers[i-1].activations.T, delta)/self.batchSize
        new_weights = self.layers[i-1].weights - self.learningRate * deltaw
        for i in range(i-1, 0, -1):
            if self.layers[i].activationF=="sigmoid":
                prime= self.sigmoid_derivative(np.matmul(self.layers[i-1].activations,self.layers[i-1].weights))
            elif self.layers[i].activationF=="relu":
                prime= self.relu_derivative(np.matmul(self.layers[i-1].activations,self.layers[i-1].weights))
            elif self.layers[i].activationF=="tanh":
                prime= self.tanh_derivative(np.matmul(self.layers[i-1].activations,self.layers[i-1].weights))
            
            delta=np.multiply(prime,delta.dot(self.layers[i].weights.T))
            deltaw = np.dot(self.layers[i-1].activations.T, delta)/self.batchSize

            self.layers[i].weights = new_weights
            new_weights = self.layers[i-1].weights - self.learningRate * deltaw
        self.layers[0].weights = new_weights
            
    def getAccuracy(self, inputs, labels):
        inputs=inputs/255
        self.batchSize = len(inputs)
        self.forwardPropo(inputs)
        a = self.layers[self.totalLayers-1].activations
        print(len(a))
        total=0
        correct=0
        for i in range(len(a)):
            total += 1
            al = a[i].tolist()
            if labels[i][al.index(max(al))] == 1:
                correct += 1
        print(correct)
        print("Accuracy: ", correct*100/total)
    
    def sigmoid(self, x):
        return np.divide(1, np.add(1, np.exp(np.negative(x))))
    
    def sigmoid_derivative(self,x):
        return (self.sigmoid(x)*(1-self.sigmoid(x)))
    
    def relu(self, x):
        return (x/700) * (x > 0)
    
    def relu_derivative(self,X):
        return 1. * (X > 0)
    
    def softmax(self, x):
        exp = np.exp(x)
        if isinstance(x[0], np.ndarray):
            return exp/np.sum(exp, axis=1, keepdims=True)
        else:
            return exp/np.sum(exp, keepdims=True)

    def tanh(self, x):
        return np.tanh(x)
    
    def tanh_derivative(self,x):
        return 1.0 - np.tanh(x) ** 2

In [241]:
def getOneHotLabels(data,k):
    one_hot_labels = np.zeros((len(data), k))
    for i in range(len(data)):  
        one_hot_labels[i,data[i,0]] = 1
    return one_hot_labels

In [None]:
data=pd.read_csv("../input/apparel-trainval.csv").values

### Question-1 Part-2

**Sigmoid**

In [284]:
train,test=splitTrainTest(data,80)
oneHotLabelsTrain=getOneHotLabels(train,10)
oneHotLabelsTest=getOneHotLabels(test,10)
trainInputs=train[:,1:]
testInputs=test[:,1:]
print("Neural network with sigmoid activation function in hidden layers")

numberofLayers=4
noofneurons=[784,16,16,10]
activationFunctions=[None,"sigmoid","sigmoid","softmax"]
batchSize=64
epochs=50
learningRat=0.1

#NeuralNet(noLayers, noNeurons in each layer, activationFunctions)
nn=NeuralNet(numberofLayers,noofneurons,activationFunctions)

nn.trainNetwork(trainInputs,oneHotLabelsTrain,batchSize,epochs,learningRat)
nn.getAccuracy( testInputs, oneHotLabelsTest)

Neural network with sigmoid activation function in hidden layers
Epoch  0 ->Error:  1.1444362710733365
Epoch  1 ->Error:  0.8908223800291348
Epoch  2 ->Error:  0.8233232467751349
Epoch  3 ->Error:  0.7930685779650962
Epoch  4 ->Error:  0.7710560795228129
Epoch  5 ->Error:  0.7498884656331919
Epoch  6 ->Error:  0.7273381097528101
Epoch  7 ->Error:  0.7042487708341048
Epoch  8 ->Error:  0.6813432143368299
Epoch  9 ->Error:  0.6612763580214069
Epoch  10 ->Error:  0.6446275418117886
Epoch  11 ->Error:  0.6309052730757667
Epoch  12 ->Error:  0.6199287702205996
Epoch  13 ->Error:  0.6107823543078843
Epoch  14 ->Error:  0.6015650332029695
Epoch  15 ->Error:  0.5923679680194792
Epoch  16 ->Error:  0.5836847893284267
Epoch  17 ->Error:  0.5751551321748132
Epoch  18 ->Error:  0.566363783765619
Epoch  19 ->Error:  0.5570908098102729
Epoch  20 ->Error:  0.5470884223915831
Epoch  21 ->Error:  0.5363040433133341
Epoch  22 ->Error:  0.5253838983021238
Epoch  23 ->Error:  0.5153271903378908
Epoch  24 

**Tanh**

In [285]:
train,test=splitTrainTest(data,80)
oneHotLabelsTrain=getOneHotLabels(train,10)
oneHotLabelsTest=getOneHotLabels(test,10)
trainInputs=train[:,1:]
testInputs=test[:,1:]
print("Neural network with tanh activation function in hidden layers")

numberofLayers=4
noofneurons=[784,16,16,10]
activationFunctions=[None,"tanh","tanh","softmax"]
batchSize=64
epochs=50
learningRat=0.1

#NeuralNet(noLayers, noNeurons in each layer, activationFunctions)
nn=NeuralNet(numberofLayers,noofneurons,activationFunctions)

nn.trainNetwork(trainInputs,oneHotLabelsTrain,batchSize,epochs,learningRat)
nn.getAccuracy( testInputs, oneHotLabelsTest)

Neural network with tanh activation function in hidden layers
Epoch  0 ->Error:  0.9147561093379838
Epoch  1 ->Error:  0.7601456485701445
Epoch  2 ->Error:  0.6674523294338822
Epoch  3 ->Error:  0.6600319876154879
Epoch  4 ->Error:  0.619981507480916
Epoch  5 ->Error:  0.6191465188244452
Epoch  6 ->Error:  0.6242408257106333
Epoch  7 ->Error:  0.6441535465193587
Epoch  8 ->Error:  0.657844591786037
Epoch  9 ->Error:  0.6426668242482998
Epoch  10 ->Error:  0.6405906354616409
Epoch  11 ->Error:  0.6340736362465693
Epoch  12 ->Error:  0.6366770445885462
Epoch  13 ->Error:  0.6397543444264681
Epoch  14 ->Error:  0.6355432380601799
Epoch  15 ->Error:  0.6393952941441227
Epoch  16 ->Error:  0.6370275001477713
Epoch  17 ->Error:  0.6318105346596896
Epoch  18 ->Error:  0.610671360339365
Epoch  19 ->Error:  0.5937647955716299
Epoch  20 ->Error:  0.5716779396451035
Epoch  21 ->Error:  0.5626718533952708
Epoch  22 ->Error:  0.5582246468967483
Epoch  23 ->Error:  0.5530235268787245
Epoch  24 ->Err

**ReLU**

In [286]:
train,test=splitTrainTest(data,80)
oneHotLabelsTrain=getOneHotLabels(train,10)
oneHotLabelsTest=getOneHotLabels(test,10)
trainInputs=train[:,1:]
testInputs=test[:,1:]
print("Neural network with reLU activation function in hidden layers")

numberofLayers=4
noofneurons=[784,16,16,10]
activationFunctions=[None,"relu","relu","softmax"]
batchSize=64
epochs=50
learningRat=0.1

#NeuralNet(noLayers, noNeurons in each layer, activationFunctions)
nn=NeuralNet(numberofLayers,noofneurons,activationFunctions)

nn.trainNetwork(trainInputs,oneHotLabelsTrain,batchSize,epochs,learningRat)
nn.getAccuracy( testInputs, oneHotLabelsTest)


Neural network with reLU activation function in hidden layers
Epoch  0 ->Error:  1.1200639439802258
Epoch  1 ->Error:  0.7768701455401835
Epoch  2 ->Error:  0.6826118613949433
Epoch  3 ->Error:  0.601097727277558
Epoch  4 ->Error:  0.5401174423527499
Epoch  5 ->Error:  0.4939050145023782
Epoch  6 ->Error:  0.44880891295912295
Epoch  7 ->Error:  0.41630738826816527
Epoch  8 ->Error:  0.38543007848742716
Epoch  9 ->Error:  0.3602471454015399
Epoch  10 ->Error:  0.33890628990100635
Epoch  11 ->Error:  0.31979703725485953
Epoch  12 ->Error:  0.30486966629376494
Epoch  13 ->Error:  0.2939486849104042
Epoch  14 ->Error:  0.2865041112838718
Epoch  15 ->Error:  0.2784787397629279
Epoch  16 ->Error:  0.2722440148543084
Epoch  17 ->Error:  0.2653483233853403
Epoch  18 ->Error:  0.2599242885009695
Epoch  19 ->Error:  0.25535208624075295
Epoch  20 ->Error:  0.25097954213408186
Epoch  21 ->Error:  0.2456668034194958
Epoch  22 ->Error:  0.242839050825094
Epoch  23 ->Error:  0.24148325900938936
Epoch