In [1]:
import numpy as np
from random import seed, random

In [4]:
# Initializing NN

def initialize_network(n_inputs, n_hidden, n_outputs):
    """ 
    Initializes a nn with only one hidden layer.
    Args: 
    - n_inputs: how many input values will be put into this neural network. This is the amount of neurons in the input layer essentially
    - n_hidden: how many neurons will be in the hidden layer of the nn.
    - n_outputs: how many different types of output values can the nn give.
    returns:
    - network (list) - list of 3 layers with each element in list being its own list of neurons in the layer. 
    The neurons then consist of a dictionary with a weights key correlating to a list of randomly initialized weights value.
    Remember each neuron in a layer has a correlating weight for every single neuron in the previous layer
    Also in the hidden and output layer one extra weight is initialized which is the bias term of the neuron's weight activation calculation.
    """
    network = list()
    hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
    network.append(output_layer)
    return network


In [5]:
seed(1)
network = initialize_network(2,1,2)
for layer in network:
    print(layer)

[{'weights': [0.13436424411240122, 0.8474337369372327, 0.763774618976614]}]
[{'weights': [0.2550690257394217, 0.49543508709194095]}, {'weights': [0.4494910647887381, 0.651592972722763]}]


The activation function is just multiplying the previous neurons' activation values by their correlating weight value and summing the products. Then just add the bias term to the sum.

Math formula for activation:

$n$ = number of neurons in previous layer
$$a(\text{some output}) = \sigma(w_0a_0+w_1a_1+...+w_na_n+b)$$

In [19]:
def activate(weights, inputs):
    return np.dot(weights[:-1], inputs) + weights[-1]

## random input with activation function
inputs = [.034, .553]
activation_ex = activate(network[0][0]['weights'], inputs)
print(activation_ex)

1.2369738598027253


The transfer function is there to basically just normalize activation value around [0,1]
The most commonly used one is the sigmoid function which can be defined as:
$$
\sigma(a) = \frac{1}{1+e^{-a}}
$$


In [14]:
def transfer(activation):
    return 1.0/(1.0+np.exp(-activation))

trans_ex = transfer(activation_ex)
print(trans_ex)

0.7750368315543618


In [46]:
def forward_propagate(network, row):
    """ 
    This is the actual process of inputting a set of values into the neurarl network and it calculating the output values.
    Args:
    - network (list) : previously made network list.
    - row (list) : list of input activation values to enter into the nn.
    returns:
    - inputs (list) : list of output activation values for the nn.
    """
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron["output"] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

In [21]:
row = [1, 0]
forward_propagate(network, row)

[0.6629970129852887, 0.7253160725279748]

In [23]:
def transfer_derivative(output):
    """
    basically all you are doing here is calculating del(a^(L)/del(z^(L))), i feel like it would be more accurate to say this is the
    derivative of the activation.
    """
    return output * (1.0-output)

In [25]:
def delta_calc(output, expected):
    """ 
    This part essentially calculates del(C0)/del(z^(L)) or the change in the cost function in relation to the activation function.
    """
    return 2*(output-expected) * transfer_derivative(output)

In [55]:
def propagate_backwards_error(network, expected):
    """ 
    Basic basic form of back propogation that doesn't change bias.
    """
    for L in reversed(range(len(network))):
        layer = network[L]
        errors = list()
        if L == len(network)-1: # if the layer is the last layer 
            for j,neuron in enumerate(layer):
                error = 2*(neuron['output']-expected[j]) # Calculates error for output term
                errors.append(error)
        else: # If layer is not last layer
            for j in range(len(layer)): # Go through every neuron
                error = 0.0 
                for neuron in network[L+1]:
                    error += (neuron['weights'][j]) * neuron['delta']
                errors.append(error)
        for j,neuron in enumerate(layer): # Goes through all outputs on last layer
                neuron['delta'] = errors[j] * transfer_derivative(neuron['output']) # Creates change vector 

In [40]:
# revert network incase of bugs
network = [[{'output': 0.7105668883115941, 'weights': [0.13436424411240122, 0.8474337369372327, 0.763774618976614]}],
 [{'output': 0.6213859615555266, 'weights': [0.2550690257394217, 0.49543508709194095]}, {'output': 0.6573693455986976, 'weights': [0.4494910647887381, 0.651592972722763]}]]

In [41]:
expected = [0, 1]
propagate_backwards_error(network, expected)

In [42]:
network

[[{'output': 0.7105668883115941,
   'weights': [0.13436424411240122, 0.8474337369372327, 0.763774618976614],
   'delta': 0.0010696096093221034}],
 [{'output': 0.6213859615555266,
   'weights': [0.2550690257394217, 0.49543508709194095],
   'delta': 0.29238129367165616},
  {'output': 0.6573693455986976,
   'weights': [0.4494910647887381, 0.651592972722763],
   'delta': -0.1543447548692654}]]

In [43]:
# Update network weights with error
def update_weights(network, row, l_rate):
	for L in range(len(network)): # starting at first layer
		inputs = row 
		if L != 0: # If the layer is not the first
			inputs = [neuron['output'] for neuron in network[L - 1]] # updates inputs to be activations of last layer
		for neuron in network[L]: # for every neuron in this layer
			for j in range(len(inputs)):
				neuron['weights'][j] -= l_rate * neuron['delta'] * inputs[j] # updates weights (you are subtracting because you want the error lower)
			neuron['weights'][-1] -= l_rate * neuron['delta'] # updates bias, change in bias in relation to sum its just one.

In [53]:
import timeit
def train_network(network, train, l_rate, n_epoch, n_outputs):
	for epoch in range(n_epoch):
		start = timeit.timeit()
		print("Starting epoch:", epoch)
		sum_error = 0
		for row in train: # for every training example given
			inputs = row[:-1]
			outputs = forward_propagate(network, inputs) # Get outputs of inputs
			expected = [0 for i in range(n_outputs)] # creates list of 0 for every output
			expected[row[-1]] = 1 # sets the answer of the input to be 1 in the expected vector
			sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))]) 
			propagate_backwards_error(network, expected)
			update_weights(network, row, l_rate)
		end = timeit.timeit()
		print('>epoch=%d, lrate=%.3f, error=%.3f, time elapsed=%.3f' % (epoch, l_rate, sum_error, (end-start)))

In [48]:
dataset = [[2.7810836,2.550537003,0],
 [1.465489372,2.362125076,0],
 [3.396561688,4.400293529,0],
 [1.38807019,1.850220317,0],
 [3.06407232,3.005305973,0],
 [7.627531214,2.759262235,1],
 [5.332441248,2.088626775,1],
 [6.922596716,1.77106367,1],
 [8.675418651,-0.242068655,1],
 [7.673756466,3.508563011,1]]

In [49]:
n_inputs = len(dataset[0]) - 1
n_outputs = len(set([row[-1] for row in dataset]))
network = initialize_network(n_inputs, 2, n_outputs)

In [52]:
weights = [0.7887233511355132, 0.0938595867742349, 0.02834747652200631]
weights[:-1]

[0.7887233511355132, 0.0938595867742349]

In [51]:
network

[[{'weights': [0.7887233511355132, 0.0938595867742349, 0.02834747652200631]},
  {'weights': [0.8357651039198697, 0.43276706790505337, 0.762280082457942]}],
 [{'weights': [0.0021060533511106927, 0.4453871940548014, 0.7215400323407826]},
  {'weights': [0.22876222127045265, 0.9452706955539223, 0.9014274576114836]}]]

In [56]:
train_network(network, dataset, 0.5, 20, n_outputs)

Starting epoch: 0
>epoch=0, lrate=0.500, error=6.399, time elapsed=0.000
Starting epoch: 1
>epoch=1, lrate=0.500, error=5.641, time elapsed=-0.000
Starting epoch: 2
>epoch=2, lrate=0.500, error=5.598, time elapsed=-0.000
Starting epoch: 3
>epoch=3, lrate=0.500, error=5.560, time elapsed=-0.000
Starting epoch: 4
>epoch=4, lrate=0.500, error=5.477, time elapsed=-0.000
Starting epoch: 5
>epoch=5, lrate=0.500, error=5.256, time elapsed=-0.000
Starting epoch: 6
>epoch=6, lrate=0.500, error=4.848, time elapsed=0.000
Starting epoch: 7
>epoch=7, lrate=0.500, error=4.428, time elapsed=0.000
Starting epoch: 8
>epoch=8, lrate=0.500, error=3.955, time elapsed=-0.000
Starting epoch: 9
>epoch=9, lrate=0.500, error=3.476, time elapsed=-0.000
Starting epoch: 10
>epoch=10, lrate=0.500, error=3.002, time elapsed=0.000
Starting epoch: 11
>epoch=11, lrate=0.500, error=2.531, time elapsed=-0.000
Starting epoch: 12
>epoch=12, lrate=0.500, error=2.140, time elapsed=0.000
Starting epoch: 13
>epoch=13, lrate=0

In [57]:
network

[[{'weights': [1.6929903483941775, -2.6037805953745474, -0.25898733196259016],
   'output': 0.9613118549459109,
   'delta': -0.010500887202609983},
  {'weights': [0.8359447344301513, 0.45089593001563344, 0.7713891257718044],
   'output': 0.9998444633443719,
   'delta': 6.86806038476142e-06}],
 [{'weights': [-3.375976277605466, 0.4784925052614799, 1.0162136155613617],
   'output': 0.1556546786659818,
   'delta': 0.04091423688845803},
  {'weights': [3.1435861558255827, -0.4879185769545255, -0.8591740224799083],
   'output': 0.8329977560176469,
   'delta': -0.04646419749069415}]]

In [60]:
import pandas as pd 

seeds = pd.read_csv('wheat-seeds.csv', header=None)
seeds

Unnamed: 0,0,1,2,3,4,5,6,7
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,3
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,3
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,3
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,3


In [65]:
for index, row in seeds.iterrows():
    print(row)

0    15.260
1    14.840
2     0.871
3     5.763
4     3.312
5     2.221
6     5.220
7     1.000
Name: 0, dtype: float64
0    14.8800
1    14.5700
2     0.8811
3     5.5540
4     3.3330
5     1.0180
6     4.9560
7     1.0000
Name: 1, dtype: float64
0    14.290
1    14.090
2     0.905
3     5.291
4     3.337
5     2.699
6     4.825
7     1.000
Name: 2, dtype: float64
0    13.8400
1    13.9400
2     0.8955
3     5.3240
4     3.3790
5     2.2590
6     4.8050
7     1.0000
Name: 3, dtype: float64
0    16.1400
1    14.9900
2     0.9034
3     5.6580
4     3.5620
5     1.3550
6     5.1750
7     1.0000
Name: 4, dtype: float64
0    14.3800
1    14.2100
2     0.8951
3     5.3860
4     3.3120
5     2.4620
6     4.9560
7     1.0000
Name: 5, dtype: float64
0    14.6900
1    14.4900
2     0.8799
3     5.5630
4     3.2590
5     3.5860
6     5.2190
7     1.0000
Name: 6, dtype: float64
0    14.1100
1    14.1000
2     0.8911
3     5.4200
4     3.3020
5     2.7000
6     5.0000
7     1.0000
Name: 7, dtype: 