In [18]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import pandas as pd
pd.options.display.max_columns = 80
import numpy as np
from random import random,seed, randrange

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from math import exp
from csv import reader

- ### simple neural network with backpropagation

We will organize layers as arrays of dictionaries and treat the whole network as an array of layers.

- neuron: dictionary {}; layer: list of dict [ {} ]; network: list of list [ [] ]  

- neuron in hidden layer: each has n_inputs + 1 weights
- neuron in output layer: each has n_hidden + 1 weights

In [19]:
def initialize_network(n_inputs, n_hidden, n_outputs):
    network = list()
    hidden_layer = [{'weights': [random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights': [random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
    network.append(output_layer)
    return network

In [20]:
n1 = initialize_network(2, 1, 2)
for layer in n1:
    print(f'layer:{layer}\n')
    
for layer in n1:
    print('--layer:')
    for neuron in layer:
        print(f'neuron: {neuron}')

layer:[{'weights': [0.762280082457942, 0.0021060533511106927, 0.4453871940548014]}]

layer:[{'weights': [0.7215400323407826, 0.22876222127045265]}, {'weights': [0.9452706955539223, 0.9014274576114836]}]

--layer:
neuron: {'weights': [0.762280082457942, 0.0021060533511106927, 0.4453871940548014]}
--layer:
neuron: {'weights': [0.7215400323407826, 0.22876222127045265]}
neuron: {'weights': [0.9452706955539223, 0.9014274576114836]}


In [21]:
# neuron activation
def activate(weights, inputs):
    activation = weights[-1] # last weight is bias
    for i in range(len(weights)-1):
        activation += weights[i]*inputs[i]
    return activation

# neuron transfer
def transfer(activation):
    return 1 / (1 + exp(-activation))

In [22]:
activation = activate(n1[0][0]['weights'], [1,2])
output = 1 / (1 + exp(-activation))

In [23]:
n1[0][0]

{'weights': [0.762280082457942, 0.0021060533511106927, 0.4453871940548014]}

In [24]:
def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

In [25]:
output = forward_propagate(n1, [1,2])
print(output)

[0.6867144677141436, 0.8361535633354321]


In [26]:
# assume transfer is sigmoid function
def transfer_derivative(output):
    return output*(1-output)

def backward_propagate_error(network, expected):
    for l in reversed(range(len(network))):
        layer = network[l]
        errors = list() # list of the error(not error in cost function, more of the term counting error in delta) for all the neuron in i_th layer
        
        if l == len(network)-1:
            # last layer
            for j in range(len(layer)):
                neuron = layer[j]
                # assume cost function is quadratic
                errors.append(expected[j] - neuron['output'])
        else:
            # hidden layer
            for j in range(len(layer)):
                err = 0
                for neuron in network[l+1]:
                    err += (neuron['weights'][j]*neuron['delta'])
                errors.append(err)
        # multiply to dg/dz(derivative of the activation function g)
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j]*transfer_derivative(neuron['output'])

In [27]:
network = [[{'output': 0.7105668883115941, 'weights': [0.13436424411240122, 0.8474337369372327, 0.763774618976614]}],
           [{'output': 0.6213859615555266, 'weights': [0.2550690257394217, 0.49543508709194095]}, 
            {'output': 0.6573693455986976, 'weights': [0.4494910647887381, 0.651592972722763]}]]
expected = [0, 1]
backward_propagate_error(network, expected)
for layer in network:
    print(layer)

[{'output': 0.7105668883115941, 'weights': [0.13436424411240122, 0.8474337369372327, 0.763774618976614], 'delta': -0.0005348048046610517}]
[{'output': 0.6213859615555266, 'weights': [0.2550690257394217, 0.49543508709194095], 'delta': -0.14619064683582808}, {'output': 0.6573693455986976, 'weights': [0.4494910647887381, 0.651592972722763], 'delta': 0.0771723774346327}]


In [31]:
def update_weights(network, row, l_rate):
    for i in range(len(network)): # layer
        if i == 0:
            inputs = row[:-1]
        else:
            inputs = [neuron['output'] for neuron in network[i-1]]
        
        for neuron in network[i]:
            
            for j in range(len(inputs)):
                neuron['weights'][j] += l_rate*neuron['delta']*inputs[j]
                
            # bias
            neuron['weights'][-1] += l_rate*neuron['delta']

In [32]:
def train_network(network, train, l_rate, n_epoch, n_outputs):
    for i in range(n_epoch):
        sum_error = 0
        for row in train:
            outputs = forward_propagate(network, row)
            
            # one-hot encode the Y
            expected = [0 for i in range(n_outputs)]

            expected[row[-1]] = 1
            sum_error += sum([(outputs[i] - expected[i])**2 for i in range(n_outputs)])
            
            backward_propagate_error(network, expected)
            update_weights(network, row, l_rate)
        
        #print(f'> epoch={i}, lrate={l_rate}, error={sum_error}')

In [33]:
seed(1)
dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]

In [34]:
n_inputs = len(dataset[0]) - 1
n_outputs = len(set([row[-1] for row in dataset]))
network = initialize_network(n_inputs, 2, n_outputs)

In [35]:
train_network(network, dataset, 0.5, 20, n_outputs)
for layer in network:
    print(layer)

[{'weights': [-1.4688375095432327, 1.850887325439514, 1.0858178629550297], 'output': 0.029980305604426185, 'delta': -0.0059546604162323625}, {'weights': [0.37711098142462157, -0.0625909894552989, 0.2765123702642716], 'output': 0.9456229000211323, 'delta': 0.0026279652850863837}]
[{'weights': [2.515394649397849, -0.3391927502445985, -0.9671565426390275], 'output': 0.23648794202357587, 'delta': -0.04270059278364587}, {'weights': [-2.5584149848484263, 1.0036422106209202, 0.42383086467582715], 'output': 0.7790535202438367, 'delta': 0.03803132596437354}]


In [36]:
def predict(network, row):
    outputs = forward_propagate(network, row)
    return outputs.index(max(outputs))

In [37]:
for row in dataset:
	prediction = predict(network, row)
	print('Expected=%d, Got=%d' % (row[-1], prediction))

Expected=0, Got=0
Expected=0, Got=0
Expected=0, Got=0
Expected=0, Got=0
Expected=0, Got=0
Expected=1, Got=1
Expected=1, Got=1
Expected=1, Got=1
Expected=1, Got=1
Expected=1, Got=1


In [38]:
def back_propagation(train, test, l_rate, n_epoch, n_hidden):
    n_inputs = len(train[0]) - 1
    n_outputs = len(set([row[-1] for row in train]))
    network = initialize_network(n_inputs, n_hidden, n_outputs)
    train_network(network, train, l_rate, n_epoch, n_outputs)
    predictions = list()
    for row in test:
        prediction = predict(network, row)
        predictions.append(prediction)
    return predictions

- #### Wheat seeds dataset

In [39]:
def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

def str_column_to_float(dataset,column):
    for row in dataset:
        row[column] = float(row[column].strip())
        
def str_column_to_int(dataset,column):
    '''convert class into integer start from zero. '''
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in dataset:
        row[column] = lookup[row[column]]

def dataset_minmax(dataset):
    minmax = [[min(column), max(column)] for column in zip(*dataset)]
    return minmax

def normalize_dataset(dataset, minmax):
    for row in dataset:
        for i in range(len(row)-1):
            row[i] = (row[i] - minmax[i][0])/(minmax[i][1] - minmax[i][0])
            
def cross_validation_split(dataset, n_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold)< fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct/float(len(actual))*100.0

def evaluate_algorithm(dataset, algorithm, n_folds, **kwargs):
    folds = cross_validation_split(dataset, n_folds)
    scores = list()
    for fold in folds:
        train_set = list(folds)
        train_set.remove(fold)
        train_set = sum(train_set, [])# turn n_fold -1 folds into dataset structure [[row1],[row2]]
        test_set = list()
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None
        predicted = algorithm(train_set, test_set, **kwargs)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
    return scores

In [40]:
filename= 'wheat-seeds.csv'
dataset = load_csv(filename)

for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
str_column_to_int(dataset, len(dataset[0])-1)
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)

n_folds = 5
l_rate = 0.3
n_epoch = 500
n_hidden = 5

scores = evaluate_algorithm(dataset, back_propagation, n_folds, l_rate=l_rate, n_epoch=n_epoch, n_hidden=n_hidden)
print(scores)
print(f'mean accuracy:{sum(scores)/float(len(scores))}')

[88.09523809523809, 100.0, 92.85714285714286, 100.0, 92.85714285714286]
mean accuracy:94.76190476190477


In [234]:
filename= 'wheat-seeds.csv'
dataset = load_csv(filename)

for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
str_column_to_int(dataset, len(dataset[0])-1)
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)

n_folds = 5
l_rate = 0.3
n_epoch = 500
n_hidden = 2

scores = evaluate_algorithm(dataset, back_propagation, n_folds, l_rate=l_rate, n_epoch=n_epoch, n_hidden=n_hidden)
print(scores)
print(f'mean accuracy:{sum(scores)/float(len(scores))}')

[83.33333333333334, 76.19047619047619, 83.33333333333334, 95.23809523809523, 90.47619047619048]
mean accuracy:85.71428571428571


In [41]:
network

[[{'weights': [-1.4688375095432327, 1.850887325439514, 1.0858178629550297],
   'output': 0.024322537679354106,
   'delta': -0.0059546604162323625},
  {'weights': [0.37711098142462157, -0.0625909894552989, 0.2765123702642716],
   'output': 0.9502996684842616,
   'delta': 0.0026279652850863837}],
 [{'weights': [2.515394649397849, -0.3391927502445985, -0.9671565426390275],
   'output': 0.22647726109932073,
   'delta': -0.04270059278364587},
  {'weights': [-2.5584149848484263, 1.0036422106209202, 0.42383086467582715],
   'output': 0.7884094596056147,
   'delta': 0.03803132596437354}]]