In [1]:
# Imports

import random
import numpy as np
from sklearn.datasets import load_iris
import pandas as pd
import sklearn.model_selection
from sklearn.preprocessing import StandardScaler

In [2]:
# Declare constants

NUM_INPUT_NODES = 4
NUM_OUTPUT_NODES = 3
NUM_HIDDEN_NODES = 10
NUM_HIDDEN_LAYERS = 1

In [3]:
class Node:
    """Each node has a weight and bias"""
    def __init__(self, num_weights):
        """
        num_weights: the number of nodes in the previous layer
        """
        self.z = 0
        self.activation = 0
        self.weights = [random.uniform(0, 0.01) for _ in range(num_weights)]
        self.bias = random.uniform(0.0, 0.01)

# Consider using a separate class for input node, not necessary
class InputNode:
    def __init__(self):
        self.activation = 0

In [4]:
# activation functions

def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def leaky_relu(x, alpha=0.01):
    return max(alpha * x, x)

def softmax(logits):
    exp_logits = np.exp(logits - np.max(logits))  # for numerical stability
    return exp_logits / np.sum(exp_logits, axis=0)

def sigmoid_derivative(x):
    return x * (1 - x)

In [5]:
class MLP:
    "Create a multilayer perceptron"
    def __init__(self, n_in, n_out, n_hidden, hidden_layers):
        self.n_hidden_layers = hidden_layers;
        self.nodes = []
        self.create(n_in, n_out, n_hidden)

    def create(self, n_in, n_out, n_hidden):
        nodes = []
        
        input_layer = [InputNode() for _ in range(n_in)]
        nodes.append(input_layer)
        
        for i in range(self.n_hidden_layers):
            nodes.append([Node(num_weights=n_in) for _ in range(n_hidden)])
            
        output_layer = [Node(num_weights=n_hidden) for _ in range(n_out)]
        nodes.append(output_layer)
        
        self.nodes = nodes

    def forward_pass(self, X_row):
        # input layer
        for i, input_node in enumerate(self.nodes[0]):
            input_node.activation = X_row[i]

        # input layer to hidden layer
        for L in range(1, self.n_hidden_layers + 1):  # note +1 must be used bc the range is not inclusive of upper
            for node_j in self.nodes[L]:
                node_j.z = sum(node_k.activation * node_j.weights[k] for k, node_k in enumerate(self.nodes[L - 1])) + node_j.bias
                node_j.activation = sigmoid(node_j.z)

        # calculate z of output layer
        for output_node_j in self.nodes[-1]:
            output_node_j.z = sum(node_k.activation * output_node_j.weights[k] for k, node_k in enumerate(self.nodes[-2])) + output_node_j.bias

        # apply softmax to output layer activations as a vector
        softmax_output = softmax([output_node.z for output_node in self.nodes[-1]])
        
        #update activations of output layer
        for j, output_node_j in enumerate(self.nodes[-1]):
            output_node_j.activation = softmax_output[j]

    def backpropogation(self, eta, y):
        """
        eta: learning rate
        y: one hot encoded expected output of output layer
        """
        # output layer
        for j, output_node_j in enumerate(self.nodes[-1]):
            for k, weight_jk in enumerate(output_node_j.weights):
                a_k = self.nodes[-2][k].activation
                z_j = output_node_j.z
                a_j = output_node_j.activation
                weight_jk -= eta * a_k * sigmoid_derivative(z_j) * 2 * (a_j - y[j])
                output_node_j.weights[k] = weight_jk
            

In [6]:
# Create MLP

mlp = MLP(NUM_INPUT_NODES, NUM_OUTPUT_NODES, NUM_HIDDEN_NODES, NUM_HIDDEN_LAYERS)

In [7]:
iris = load_iris()

In [8]:
X = iris.data
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [9]:
y = iris.target

In [10]:
# Number of classes
num_classes = np.max(y) + 1

# One-hot encode the target variable y
y = np.eye(num_classes)[y]

In [11]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.33)

In [12]:
mlp.nodes[-1][0].weights

[0.00010873904030843429,
 0.003326364812866578,
 0.007464251200943461,
 0.0004893264892635563,
 0.0022365393916389674,
 0.003562064246619415,
 0.009850213543394826,
 0.006351368237591657,
 0.006247626377293639,
 0.006470276718304192]

In [13]:
# training loop
learning_rate = 0.001
epochs = 1000
for i in range(0, epochs):
    print("epoch " + str(i))
    # 1 epoch includes all data 
    for i, row in enumerate(X_train):
        mlp.forward_pass(X_train[i])
        input = [node.activation for node in mlp.nodes[0]]
        hidden = [node.activation for node in mlp.nodes[1]]
        output = [node.activation for node in mlp.nodes[-1]]
        print(output)
        mlp.backpropogation(learning_rate, y_train[i])

epoch 0
[0.3328484033572855, 0.33166888947478834, 0.33548270716792605]
[0.3328655538329689, 0.33165139642371205, 0.3354830497433191]
[0.33286152026296106, 0.3316858788095622, 0.33545260092747675]
[0.3328356554530298, 0.33171756223805265, 0.33544678230891756]
[0.33281381754741884, 0.33169791154641076, 0.33548827090617045]
[0.33278788760772304, 0.33167980880001785, 0.33553230359225905]
[0.3327657534352957, 0.3317124472261328, 0.33552179933857146]
[0.3327614808639344, 0.33169674282610734, 0.3355417763099583]
[0.3328021003266511, 0.3316830730344633, 0.3355148266388856]
[0.3328340363027497, 0.33166701533613147, 0.33549894836111877]
[0.33281147489190005, 0.33169877881170357, 0.33548974629639633]
[0.33280057262047275, 0.3316812631073663, 0.33551816427216097]
[0.3327778760329852, 0.33171359113776344, 0.3355085328292514]
[0.3327622886417052, 0.33174712238477927, 0.3354905889735156]
[0.33276090794555063, 0.3317822069256476, 0.3354568851288018]
[0.3327990072129521, 0.3317684421115553, 0.335432550

In [14]:
mlp.nodes[-1][0].weights

[-0.005251154017596051,
 -0.0020179200218781146,
 0.0024548214580045148,
 -0.0045710072276596864,
 -0.0025886174113939654,
 -0.0021098076381299343,
 0.0043023550614887,
 0.0015662144239644426,
 0.0012198623619295711,
 0.0017143716671083812]