In [28]:
import numpy as np


class Neural_Network:
    # Initialize the network
    def __init__(self, num_inputs, num_hidden, num_outputs, hidden_layer_weights, output_layer_weights, learning_rate):
        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs

        self.hidden_layer_weights = hidden_layer_weights
        self.output_layer_weights = output_layer_weights

        self.learning_rate = learning_rate

    # Calculate neuron activation for an input
    def sigmoid(self, inputs):
        
        output = 1/(1 + np.exp(-inputs))
        return output

    def forward_pass(self, inputs):
        hidden_layer_outputs = []
        hidden_biases = [-0.02,-0.2]
        for i in range(self.num_hidden): #2
            weighted_sum = 0.
            for j in range(len(self.hidden_layer_weights)): #4
                weighted_sum += inputs[j] * self.hidden_layer_weights[j][i]
            weighted_sum += hidden_biases[i]
            output = self.sigmoid(weighted_sum)
            hidden_layer_outputs.append(output)

        output_layer_outputs = []
        output_biases = [-0.33,0.26,0.06]
        for i in range(self.num_outputs): #3
            # TODO! Calculate the weighted sum, and then compute the final output.
            weighted_sum = 0.
            for j in range(len(self.output_layer_weights)):#2
                weighted_sum += hidden_layer_outputs[j] * self.output_layer_weights[j][i]
            weighted_sum += output_biases[i]
            output = self.sigmoid(weighted_sum)
            output_layer_outputs.append(output)
        return hidden_layer_outputs, output_layer_outputs

    # Backpropagate error and store in neurons
    def backward_propagate_error(self, inputs, hidden_layer_outputs, output_layer_outputs, desired_outputs):

        output_layer_betas = np.zeros(self.num_outputs)
        # TODO! Calculate output layer betas.
        for i in range(self.num_outputs):
            output_layer_betas[i] = desired_outputs[i] - output_layer_outputs[i]
        #print('OL betas: ', output_layer_betas)

        hidden_layer_betas = np.zeros(self.num_hidden)
        for i in range (self.num_hidden): 
            for j in range(self.num_outputs):
                hidden_layer_betas[i] += self.output_layer_weights[i][j] * output_layer_outputs[j] * (1 - output_layer_outputs[j]) * output_layer_betas[j] 
        # TODO! Calculate hidden layer betas.
        #print('HL betas: ', hidden_layer_betas)

        # This is a HxO array (H hidden nodes, O outputs)
        delta_output_layer_weights = np.zeros((self.num_hidden, self.num_outputs))
        # TODO! Calculate output layer weight changes.
        for i in range(self.num_hidden):
            for j in range(self.num_outputs):
                delta_output_layer_weights[i][j] = self.learning_rate * hidden_layer_outputs[i] * output_layer_outputs[j] * (1 - output_layer_outputs[j] ) * output_layer_betas[j] 
        
        # This is a IxH array (I inputs, H hidden nodes)
        delta_hidden_layer_weights = np.zeros((self.num_inputs, self.num_hidden))
        # TODO! Calculate hidden layer weight changes.
        for i in range(self.num_inputs):
            for j in range(self.num_hidden):
                delta_hidden_layer_weights[i][j] = self.learning_rate * inputs[i] * hidden_layer_outputs[j] * ( 1 - hidden_layer_outputs[j]) * hidden_layer_betas[j] 
       
        # Return the weights we calculated, so they can be used to update all the weights.
        return delta_output_layer_weights, delta_hidden_layer_weights

    def update_weights(self, delta_output_layer_weights, delta_hidden_layer_weights):
        # TODO! Update the weights.
        self.hidden_layer_weights += delta_hidden_layer_weights
        self.output_layer_weights += delta_output_layer_weights

        #print('Placeholder')

    def train(self, instances, desired_outputs, epochs):

        for epoch in range(epochs):
            print('epoch = ', epoch)
            predictions = []
            for i, instance in enumerate(instances):
                hidden_layer_outputs, output_layer_outputs = self.forward_pass(instance)
                delta_output_layer_weights, delta_hidden_layer_weights, = self.backward_propagate_error(
                    instance, hidden_layer_outputs, output_layer_outputs, desired_outputs[i])
                predicted_class = np.argmax(output_layer_outputs)   # TODO!
                predictions.append(predicted_class)

                # We use online learning, i.e. update the weights after every instance.
                self.update_weights(delta_output_layer_weights, delta_hidden_layer_weights)

            # Print new weights
            #print('Hidden layer weights \n', self.hidden_layer_weights)
            #print('Output layer weights  \n', self.output_layer_weights)

            # TODO: Print accuracy achieved over this epoch
            acc = 0
            for i in range(len(predictions)):
                if predictions[i] == np.argmax(desired_outputs[i]):
                    acc+=1
            print('Accuracy in train set = ', acc/len(desired_outputs))

    def predict(self, instances):
        predictions = []
        for instance in instances:
            hidden_layer_outputs, output_layer_outputs = self.forward_pass(instance)
            #print(output_layer_outputs)
            predicted_class = np.argmax(output_layer_outputs)  # TODO! Should be 0, 1, or 2.
            predictions.append(predicted_class)
        return predictions

In [29]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

def encode_labels(labels):
    # encode 'Adelie' as 1, 'Chinstrap' as 2, 'Gentoo' as 3
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(labels)
    # don't worry about this
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)

    # encode 1 as [1, 0, 0], 2 as [0, 1, 0], and 3 as [0, 0, 1] (to fit with our network outputs!)
    onehot_encoder = OneHotEncoder(sparse=False)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

    return label_encoder, integer_encoded, onehot_encoder, onehot_encoded


if __name__ == '__main__':
    data = pd.read_csv('penguins307-train.csv')
    # the class label is last!
    labels = data.iloc[:, -1]
    # seperate the data from the labels
    instances = data.iloc[:, :-1]
    #scale features to [0,1] to improve training
    scaler = MinMaxScaler()
    instances = scaler.fit_transform(instances)
    # We can't use strings as labels directly in the network, so need to do some transformations
    label_encoder, integer_encoded, onehot_encoder, onehot_encoded = encode_labels(labels)
    #labels = onehot_encoded

    # Parameters. As per the handout.
    n_in = 4
    n_hidden = 2
    n_out = 3
    learning_rate = 0.2

    initial_hidden_layer_weights = np.array([[-0.28, -0.22], [0.08, 0.20], [-0.30, 0.32], [0.10, 0.01]])
    initial_output_layer_weights = np.array([[-0.29, 0.03, 0.21], [0.08, 0.13, -0.36]])

    nn = Neural_Network(n_in, n_hidden, n_out, initial_hidden_layer_weights, initial_output_layer_weights,
                        learning_rate)

    print('First instance has label {}, which is {} as an integer, and {} as a list of outputs.\n'.format(
        labels[0], integer_encoded[0], onehot_encoded[0]))

    # need to wrap it into a 2D array
    instance1_prediction = nn.predict([instances[0]])
    if instance1_prediction[0] is None:
        # This should never happen once you have implemented the feedforward.
        instance1_predicted_label = "???"
    else:
        instance1_predicted_label = label_encoder.inverse_transform(instance1_prediction)
    print('Predicted label for the first instance is: {}\n'.format(instance1_predicted_label))

    # TODO: Perform a single backpropagation pass using the first instance only. (In other words, train with 1
    #  instance for 1 epoch!). Hint: you will need to first get the weights from a forward pass.
    nn.train([instances[0]], [onehot_encoded[0]], 1)
    print('Weights after performing BP for first instance only:')
    print('Hidden layer weights:\n', nn.hidden_layer_weights)
    print('Output layer weights:\n', nn.output_layer_weights)

    #TODO: Train for 100 epochs, on all instances.
    nn.train(instances,onehot_encoded , 100)

    print('\nAfter training:')
    print('Hidden layer weights:\n', nn.hidden_layer_weights)
    print('Output layer weights:\n', nn.output_layer_weights)

    pd_data_ts = pd.read_csv('penguins307-test.csv')
    test_labels = pd_data_ts.iloc[:, -1]
    test_instances = pd_data_ts.iloc[:, :-1]
    #scale the test according to our training data.
    test_instances = scaler.fit_transform(test_instances)
    pred = nn.predict(test_instances)
    test_label_encoder, test_integer_encoded, test_onehot_encoder, test_onehot_encoded = encode_labels(test_labels)
    test_labels = test_onehot_encoded
    acc = 0
    for i in range(len(pred)):
        if pred[i] == np.argmax(test_labels[i]):
            acc+=1
    print('Accuracy in test set: = ', acc/len(test_labels))
     #TODO: Compute and print the test accuracy

First instance has label Adelie, which is [0] as an integer, and [1. 0. 0.] as a list of outputs.

Predicted label for the first instance is: ['Chinstrap']

epoch =  0
Accuracy in train set =  0.0
Weights after performing BP for first instance only:
Hidden layer weights:
 [[-0.28056275 -0.21970523]
 [ 0.07826717  0.20090767]
 [-0.30124328  0.32065123]
 [ 0.09932855  0.01035171]]
Output layer weights:
 [[-0.27633516  0.01659626  0.1982984 ]
 [ 0.09427539  0.11599737 -0.37222444]]
epoch =  0
Accuracy in train set =  0.4552238805970149
epoch =  1
Accuracy in train set =  0.48507462686567165
epoch =  2
Accuracy in train set =  0.5
epoch =  3
Accuracy in train set =  0.5783582089552238
epoch =  4
Accuracy in train set =  0.6791044776119403
epoch =  5
Accuracy in train set =  0.746268656716418
epoch =  6
Accuracy in train set =  0.7873134328358209
epoch =  7
Accuracy in train set =  0.7947761194029851
epoch =  8
Accuracy in train set =  0.7947761194029851
epoch =  9
Accuracy in train set =  