##Exercise 2b
###Multilayer Perceptron 
####One hidden layer with backpropagation

In [1189]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import random

In [1190]:
iris = pd.read_csv("iris.csv")
random.seed(6)
iris = iris.sample(frac=1,random_state=5).reset_index(drop=True) 

print ("Random number with seed 6")
random.seed(6)

Random number with seed 6


In [1191]:
X = iris[['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width']]
X = np.array(X)

In [1192]:

one_hot_encoder = OneHotEncoder(sparse=False)
Y = iris.Species
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))


In [1193]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1)

In [1194]:
def InitializeWeights(nodes):
    """Initialize weights with random values in [-1, 1] (including bias)"""
    np.random.seed(6)
    layers, weights = len(nodes), []
    
    for i in range(1, layers):
        np.random.seed(6)
        w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]
              for j in range(nodes[i])]
        weights.append(np.matrix(w))
    print(weights)
    return weights

In [1195]:
def Sigmoid(x):
    return 1 / (1 + np.exp(-x))

def SigmoidDerivative(x):
    return np.multiply(x, 1-x)

In [1196]:
def ForwardPropagation(x, weights, layers):
    activations, layer_input = [x], x #initializing with the input
    #in every iteration of j layer_input is the sigmoid of the previous layer
    for j in range(layers):
        activation = Sigmoid(np.dot(layer_input, weights[j].T))
        activations.append(activation)
        layer_input = np.append(1, activation) # Augment with bias
    
    return activations

In [1197]:
def BackPropagation(y, activations, weights, layers):
    outputFinal = activations[-1]
    error = np.matrix(y - outputFinal) # Error at output
    #backprop layers from last to first
    for j in range(layers, 0, -1):
        currActivation = activations[j]
        
        if(j > 1):
            # Augment previous activation
            prevActivation = np.append(1, activations[j-1])
        else:
            # First hidden layer, prevActivation is input (without bias)
            prevActivation = activations[0]
        
        delta = np.multiply(error, SigmoidDerivative(currActivation))
        weights[j-1] += lr * np.multiply(delta.T, prevActivation)

        w = np.delete(weights[j-1], [0], axis=1) # Remove bias from weights
        error = np.dot(delta, w) # Calculate error for current layer
    
    return weights

In [1198]:
def Train(X, Y, lr, weights):
    layers = len(weights)
    for i in range(len(X)):
        x, y = X[i], Y[i]
        x = np.matrix(np.append(1, x)) # Augment feature vector
        
        activations = ForwardPropagation(x, weights, layers)
        weights = BackPropagation(y, activations, weights, layers)

    return weights

In [1199]:
def Predict(item, weights):
    layers = len(weights)
    item = np.append(1, item) # Augment feature vector
    
    ##_Forward Propagation_##
    activations = ForwardPropagation(item, weights, layers)
    
    outputFinal = activations[-1].A1
    index = FindMaxActivation(outputFinal)

    # Initialize prediction vector to zeros
    y = [0 for i in range(len(outputFinal))]
    y[index] = 1  # Set guessed class to 1

    return y # Return prediction vector


def FindMaxActivation(output):
    """Find max activation in output"""
    m, index = output[0], 0
    for i in range(1, len(output)):
        if(output[i] > m):
            m, index = output[i], i
    
    return index

In [1200]:
def Accuracy(X, Y, weights):
    """Run set through network, find overall accuracy"""
    correct = 0

    for i in range(len(X)):
        x, y = X[i], list(Y[i])
        guess = Predict(x, weights)

        if(y == guess):
            # Guessed correctly
            correct += 1

    return correct / len(X)

In [1201]:
def NeuralNetwork(X_train, Y_train, X_val=None, Y_val=None, epochs=10, nodes=[], lr=0.15):
    hidden_layers = len(nodes) - 1
    weights = InitializeWeights(nodes)

    for epoch in range(1, epochs+1):
        weights = Train(X_train, Y_train, lr, weights)

        if(epoch % 5 == 0):
            print("Epoch {}".format(epoch))
            print("Training Accuracy:{}".format(Accuracy(X_train, Y_train, weights)))
            if X_val.any():
                print("Validation Accuracy:{}".format(Accuracy(X_val, Y_val, weights)))
            if Accuracy(X_val, Y_val, weights)>0.93:
              break
    return weights

In [1202]:
f = len(X[0]) # Number of features
o = len(Y[0]) # Number of outputs / classes

layers = [f, 10, o] # Number of nodes in layers
lr, epochs = 0.1, 20

weights = NeuralNetwork(X_train, Y_train, X_val, Y_val, epochs=epochs, nodes=layers, lr=lr);

[matrix([[ 0.7857203 , -0.33604039,  0.64245825, -0.91660675, -0.78468664],
        [ 0.19010413,  0.05963472, -0.16238514, -0.3291843 ,  0.24503886],
        [-0.12371715,  0.47176421,  0.03607282,  0.1577172 ,  0.29071019],
        [ 0.98044854,  0.63971639, -0.17359813,  0.75253531,  0.64751887],
        [-0.89105098,  0.43727447,  0.60434112,  0.47281329,  0.4182635 ],
        [ 0.08187371, -0.75035165,  0.91529459, -0.1934874 , -0.56609768],
        [ 0.43455169,  0.98841488, -0.48877189,  0.34261886,  0.19801183],
        [ 0.43466429,  0.87469907, -0.29638046, -0.49273181, -0.19505498],
        [ 0.49302143,  0.44814113, -0.18778441,  0.9787597 , -0.09900144],
        [-0.25238313,  0.41925721, -0.83508289, -0.20325416,  0.54176193]]), matrix([[ 0.7857203 , -0.33604039,  0.64245825, -0.91660675, -0.78468664,
          0.19010413,  0.05963472, -0.16238514, -0.3291843 ,  0.24503886,
         -0.12371715],
        [ 0.47176421,  0.03607282,  0.1577172 ,  0.29071019,  0.98044854,
  

In [1203]:
print("Testing Accuracy: {}".format(Accuracy(X_test, Y_test, weights)))

Testing Accuracy: 0.7391304347826086


#Results
##20 epochs
###For 2 nodes in hidden layer we have 
Training Accuracy:0.9561<BR>
Validation Accuracy:0.8461<BR>
Testing Accuracy: 0.9130<br>
###For 5 nodes in hidden layer we have 
Training Accuracy:0.9824<br>
Validation Accuracy:0.9230<br>
Testing Accuracy: 0.9565<br>
###For 10 nodes in hidden layer we have 
Training Accuracy:0.7456<br>
Validation Accuracy:0.7692<br>
Testing Accuracy: 0.7391<br>
#Conclusion
####As we expected the model performs better than the linear-perceptron(ex_2a) since it combines the hidden layer's outputs to produce the final result.<br>Although the results are better our model still doesn't classify all the examples correctly no matter how many epochs we chose to run it for.This may be due to the dataset needing more hidden layers or outliers <br>Also, if we increase the number of nodes in the hidden layer too much its possible that our model will overfit and our accuracy will decrease.<br>

