In [1]:
# Imports

import random
import numpy as np
from sklearn.datasets import load_iris
import pandas as pd
import sklearn.model_selection
from sklearn.preprocessing import StandardScaler

In [2]:
# Declare constants

NUM_INPUT_NODES = 4
NUM_OUTPUT_NODES = 3
NUM_HIDDEN_NODES = 10
NUM_HIDDEN_LAYERS = 1

In [3]:
class Node:
    """Each node has a weight and bias"""
    def __init__(self, num_weights):
        """
        num_weights: the number of nodes in the previous layer
        """
        self.z = 0
        self.activation = 0
        self.weights = [random.uniform(0, 0.01) for _ in range(num_weights)]
        self.bias = random.uniform(0.0, 0.01)

# Consider using a separate class for input node, not necessary
class InputNode:
    def __init__(self):
        self.activation = 0

In [4]:
# activation functions

def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def leaky_relu(x, alpha=0.01):
    return max(alpha * x, x)

def softmax(logits):
    exp_logits = np.exp(logits - np.max(logits))  # for numerical stability
    return exp_logits / np.sum(exp_logits, axis=0)

def sigmoid_derivative(x):
    return x * (1 - x)

In [5]:
class MLP:
    "Create a multilayer perceptron"
    def __init__(self, n_in, n_out, n_hidden, hidden_layers):
        self.n_hidden_layers = hidden_layers;
        self.nodes = []
        self.create(n_in, n_out, n_hidden)

    def create(self, n_in, n_out, n_hidden):
        nodes = []
        
        input_layer = [InputNode() for _ in range(n_in)]
        nodes.append(input_layer)
        
        for i in range(self.n_hidden_layers):
            nodes.append([Node(num_weights=n_in) for _ in range(n_hidden)])
            
        output_layer = [Node(num_weights=n_hidden) for _ in range(n_out)]
        nodes.append(output_layer)
        
        self.nodes = nodes

    def forward_pass(self, X_row):
        # input layer
        for i, input_node in enumerate(self.nodes[0]):
            input_node.activation = X_row[i]

        # input layer to hidden layer
        for L in range(1, self.n_hidden_layers + 1):  # note +1 must be used bc the range is not inclusive of upper
            for node_j in self.nodes[L]:
                node_j.z = sum(node_k.activation * node_j.weights[k] for k, node_k in enumerate(self.nodes[L - 1])) + node_j.bias
                node_j.activation = sigmoid(node_j.z)

        # calculate z of output layer
        for output_node_j in self.nodes[-1]:
            output_node_j.z = sum(node_k.activation * output_node_j.weights[k] for k, node_k in enumerate(self.nodes[-2])) + output_node_j.bias

        # apply softmax to output layer activations as a vector
        softmax_output = softmax([output_node.z for output_node in self.nodes[-1]])
        
        #update activations of output layer
        for j, output_node_j in enumerate(self.nodes[-1]):
            output_node_j.activation = softmax_output[j]

    def backpropogation(self, eta, y):
        """
        eta: learning rate
        y: one hot encoded expected output vector of output layer
        """
        # update weights between output and second to last layer
        for j, output_node_j in enumerate(self.nodes[-1]):
            for k, weight_jk in enumerate(output_node_j.weights):
                a_k = self.nodes[-2][k].activation
                z_j = output_node_j.z
                a_j = output_node_j.activation
                weight_jk -= eta * a_k * sigmoid_derivative(z_j) * 2 * (a_j - y[j])
                output_node_j.weights[k] = weight_jk

        # update weights between hidden and previous layers
        for L in range(1, self.n_hidden_layers + 1):  # note +1 must be used bc the range is not inclusive of upper
            for j, hidden_node_j in enumerate(self.nodes[L]):
                for k, weight_jk in enumerate(hidden_node_j.weights):
                    partialC_partiala_j = sum([sigmoid_derivative(output_node.z) * 2 * (output_node.activation - y[i]) for i, output_node in enumerate(self.nodes[L+1])])    
                    a_k = self.nodes[L-1][k].activation
                    z_j = hidden_node_j.z
                    a_j = hidden_node_j.activation
                    weight_jk -= eta * a_k * sigmoid_derivative(z_j) * 2 * partialC_partiala_j
                    hidden_node_j.weights[k] = weight_jk
                    
    def show_weights(self):
        for layer in mlp.nodes[1:]:
            print([node.weights for node in layer])
            

In [6]:
# Create MLP

mlp = MLP(NUM_INPUT_NODES, NUM_OUTPUT_NODES, NUM_HIDDEN_NODES, NUM_HIDDEN_LAYERS)

In [7]:
iris = load_iris()

In [8]:
X = iris.data
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [9]:
y = iris.target

In [10]:
# Number of classes
num_classes = np.max(y) + 1

# One-hot encode the target variable y
y = np.eye(num_classes)[y]

In [11]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.33)

In [12]:
mlp.nodes[-1][0].weights

[0.0007709389002892042,
 0.001383766723993598,
 0.009665441265467688,
 0.0047162898556733345,
 0.008485313299491745,
 0.0072975200922354615,
 0.002275503917796635,
 0.0007424400728356318,
 0.006982040808647999,
 0.004166234563045435]

In [13]:
# training loop
learning_rate = 0.01
epochs = 500
for i in range(0, epochs):
    print("epoch " + str(i))
    # 1 epoch includes all data 
    for i, row in enumerate(X_train):
        mlp.forward_pass(X_train[i])
        input = [node.activation for node in mlp.nodes[0]]
        hidden = [node.activation for node in mlp.nodes[1]]
        output = [node.activation for node in mlp.nodes[-1]]
        print(output)
        print(y_train[i])
        mlp.backpropogation(learning_rate, y_train[i])

epoch 0
[0.3317243938074071, 0.33566732063694543, 0.3326082855556474]
[1. 0. 0.]
[0.33202431722393844, 0.3354990921297894, 0.33247659064627216]
[0. 0. 1.]
[0.33187611693032104, 0.33530870333903695, 0.332815179730642]
[0. 0. 1.]
[0.3317410242829098, 0.33509932183031, 0.33315965388678015]
[0. 1. 0.]
[0.33154651752424263, 0.3354920127845403, 0.332961469691217]
[0. 0. 1.]
[0.3314395466337339, 0.33525635114176805, 0.33330410222449813]
[1. 0. 0.]
[0.33172459968389517, 0.335102122293975, 0.33317327802212976]
[0. 0. 1.]
[0.3315714067984289, 0.3348970299304177, 0.33353156327115346]
[0. 0. 1.]
[0.3314565662738398, 0.33467119807097395, 0.3338722356551862]
[1. 0. 0.]
[0.33176935772096755, 0.33450636790801863, 0.33372427437101376]
[1. 0. 0.]
[0.3320945966990085, 0.33433868529952815, 0.33356671800146337]
[1. 0. 0.]
[0.3324004554287386, 0.33417625547893676, 0.33342328909232466]
[0. 0. 1.]
[0.3322413627851636, 0.3339809076899609, 0.3337777295248754]
[0. 1. 0.]
[0.33207894790901793, 0.3343442460087466,

In [14]:
mlp.nodes[-1][0].weights
# TODO try making initial weights larger

[0.09461990213879527,
 0.25429044740045814,
 -0.028194425181952587,
 0.0733988670589929,
 0.20326822251153503,
 0.20995989990801875,
 0.027976610198602522,
 -0.09696490661930134,
 0.17180752331045684,
 0.010445258613980215]

In [15]:
mlp.show_weights()

[[0.0884978184589052, 0.1605591789827962, -0.1779908051755755, -0.06284937091228303], [0.09164413220565629, 0.1594046346036791, -0.1876898305229189, -0.06208608808136468], [0.08522705337365757, 0.1617149459757649, -0.16806207553499886, -0.06386962091406387], [0.08869546042799484, 0.1605282318009507, -0.17866978851689674, -0.06308953159678976], [0.09074120793210358, 0.15973884132705343, -0.18488508887749136, -0.06226313058974544], [0.09092284537032663, 0.15965564676982827, -0.18541262179251963, -0.06209240063439018], [0.08777644878877318, 0.16080298187840844, -0.17576373979392496, -0.06295462078946001], [0.08759306092211415, 0.1609257376220822, -0.1753030044647804, -0.06340876343119402], [0.08968046371549326, 0.1601254724331602, -0.18160732614202935, -0.06249431506242247], [0.0869582460773305, 0.1611091605915668, -0.1733013075969411, -0.0633106251163593]]
[[0.09461990213879527, 0.25429044740045814, -0.028194425181952587, 0.0733988670589929, 0.20326822251153503, 0.20995989990801875, 0.02