##Exercise 2c
###Multilayer Perceptron 
####Two hidden layers 

In [394]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import random

In [395]:
iris = pd.read_csv("iris.csv")
random.seed(6)
iris = iris.sample(frac=1,random_state=5).reset_index(drop=True) 

print ("Random number with seed 6")
random.seed(6)

Random number with seed 6


In [396]:
X = iris[['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width']]
X = np.array(X)

In [397]:

one_hot_encoder = OneHotEncoder(sparse=False)
Y = iris.Species
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))


In [398]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1)

In [399]:
def InitializeWeights(nodes):
    """Initialize weights with random values in [-1, 1] (including bias)"""
    np.random.seed(6)
    layers, weights = len(nodes), []
    
    for i in range(1, layers):
        np.random.seed(6)
        w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]
              for j in range(nodes[i])]
        weights.append(np.matrix(w))
    print(weights)
    return weights

In [400]:
def Sigmoid(x):
    return 1 / (1 + np.exp(-x))

def SigmoidDerivative(x):
    return np.multiply(x, 1-x)

In [401]:
def ForwardPropagation(x, weights, layers):
    activations, layer_input = [x], x #initializing with the input
    #in every iteration of j layer_input is the sigmoid of the previous layer
    for j in range(layers):
        activation = Sigmoid(np.dot(layer_input, weights[j].T))
        activations.append(activation)
        layer_input = np.append(1, activation) # Augment with bias
    
    return activations

In [402]:
def BackPropagation(y, activations, weights, layers):
    outputFinal = activations[-1]
    error = np.matrix(y - outputFinal) # Error at output
    #backprop layers from last to first
    for j in range(layers, 0, -1):
        currActivation = activations[j]
        
        if(j > 1):
            # Augment previous activation
            prevActivation = np.append(1, activations[j-1])
        else:
            # First hidden layer, prevActivation is input (without bias)
            prevActivation = activations[0]
        
        delta = np.multiply(error, SigmoidDerivative(currActivation))
        weights[j-1] += lr * np.multiply(delta.T, prevActivation)

        w = np.delete(weights[j-1], [0], axis=1) # Remove bias from weights
        error = np.dot(delta, w) # Calculate error for current layer
    
    return weights

In [403]:
def Train(X, Y, lr, weights):
    layers = len(weights)
    for i in range(len(X)):
        x, y = X[i], Y[i]
        x = np.matrix(np.append(1, x)) # Augment feature vector
        
        activations = ForwardPropagation(x, weights, layers)
        weights = BackPropagation(y, activations, weights, layers)

    return weights

In [404]:
def Predict(item, weights):
    layers = len(weights)
    item = np.append(1, item) # Augment feature vector
    
    ##_Forward Propagation_##
    activations = ForwardPropagation(item, weights, layers)
    
    outputFinal = activations[-1].A1
    index = FindMaxActivation(outputFinal)

    # Initialize prediction vector to zeros
    y = [0 for i in range(len(outputFinal))]
    y[index] = 1  # Set guessed class to 1

    return y # Return prediction vector


def FindMaxActivation(output):
    """Find max activation in output"""
    m, index = output[0], 0
    for i in range(1, len(output)):
        if(output[i] > m):
            m, index = output[i], i
    
    return index

In [405]:
def Accuracy(X, Y, weights):
    """Run set through network, find overall accuracy"""
    correct = 0

    for i in range(len(X)):
        x, y = X[i], list(Y[i])
        guess = Predict(x, weights)

        if(y == guess):
            # Guessed correctly
            correct += 1

    return correct / len(X)

In [406]:
def NeuralNetwork(X_train, Y_train, X_val=None, Y_val=None, epochs=10, nodes=[], lr=0.15):
    hidden_layers = len(nodes) - 1
    weights = InitializeWeights(nodes)

    for epoch in range(1, epochs+1):
        weights = Train(X_train, Y_train, lr, weights)

        if(epoch % 5 == 0):
            print("Epoch {}".format(epoch))
            print("Training Accuracy:{}".format(Accuracy(X_train, Y_train, weights)))
            if X_val.any():
                print("Validation Accuracy:{}".format(Accuracy(X_val, Y_val, weights)))
            if Accuracy(X_val, Y_val, weights)>0.93:
              break
    return weights

In [407]:
f = len(X[0]) # Number of features
o = len(Y[0]) # Number of outputs / classes

layers = [f, 8,5, o] # Number of nodes in layers
lr, epochs = 0.1, 100

weights = NeuralNetwork(X_train, Y_train, X_val, Y_val, epochs=epochs, nodes=layers, lr=lr);

[matrix([[ 0.7857203 , -0.33604039,  0.64245825, -0.91660675, -0.78468664],
        [ 0.19010413,  0.05963472, -0.16238514, -0.3291843 ,  0.24503886],
        [-0.12371715,  0.47176421,  0.03607282,  0.1577172 ,  0.29071019],
        [ 0.98044854,  0.63971639, -0.17359813,  0.75253531,  0.64751887],
        [-0.89105098,  0.43727447,  0.60434112,  0.47281329,  0.4182635 ],
        [ 0.08187371, -0.75035165,  0.91529459, -0.1934874 , -0.56609768],
        [ 0.43455169,  0.98841488, -0.48877189,  0.34261886,  0.19801183],
        [ 0.43466429,  0.87469907, -0.29638046, -0.49273181, -0.19505498]]), matrix([[ 0.7857203 , -0.33604039,  0.64245825, -0.91660675, -0.78468664,
          0.19010413,  0.05963472, -0.16238514, -0.3291843 ],
        [ 0.24503886, -0.12371715,  0.47176421,  0.03607282,  0.1577172 ,
          0.29071019,  0.98044854,  0.63971639, -0.17359813],
        [ 0.75253531,  0.64751887, -0.89105098,  0.43727447,  0.60434112,
          0.47281329,  0.4182635 ,  0.08187371, -0.

In [408]:
print("Testing Accuracy: {}".format(Accuracy(X_test, Y_test, weights)))

Testing Accuracy: 0.9565217391304348


#Results
##20 epochs
###For 4 nodes in 1rst hidden layer and 8 in the 2nd we have :
Training Accuracy:0.6578<BR>
Validation Accuracy:0.7692<BR>
Testing Accuracy: 0.6521<br>
###For 4 nodes in 1rst hidden layer and 4 in the 2nd we have :
Training Accuracy:0.7017<br>
Validation Accuracy:0.5384<br>
Testing Accuracy: 0.5658<br>
###For 8 nodes in 1rst hidden layer and 3 in the 2nd we have :
Training Accuracy:0.6754<br>
Validation Accuracy:0.6923<br>
Testing Accuracy: 0.6086<br>
###For 8 nodes in 1rst hidden layer and 5 in the 2nd we have :
Training Accuracy:0.8771<br>
Validation Accuracy:0.9230<br>
Testing Accuracy: 1<br>
###For 8 nodes in 1rst hidden layer and 6 in the 2nd we have :
Training Accuracy:0.8642<br>
Validation Accuracy:0.8461<br>
Testing Accuracy: 0.9561<br>
#Conclusion
####As we expected the model performs better than the linear-perceptron(ex_2a) since it combines the hidden layer's outputs to produce the final result.<br>The model with the 8 nodes in the 1rst hidden layer and 6 in the 2nd performs better than any other model(for the same number of epochs) which tells us that the model is not overfiting.If we increase the nodes more the accuracy decreases and the model overfits.<br><br>Each neuron in the first hidden layer of the MLP works as an independent perceptron, i.e. each hidden neuron can make linear separations. The second hidden layer can aggregate these independent linear separations into nonlinear separations.Additional hidden layers can aggregate their predecessor hidden layer's separations to complex regions in space.A further property of a hidden layer, is a kind of dimensionality reduction. The high-dimensional input data is reduced into a much lower dimensional data, by letting the neural net finding linear dependencies within the daty automatically.<br><br>However, if we create an MLP with more neurons than needed then the model can easily overfit.In general most classification problems can be solved with 1 hidden layer.


