In [52]:
from random import random, randrange, seed
from math import exp
from csv import reader


seed(42)


def load_csv(filepath):
  dataset = []

  with open(filepath, "r") as file:
    csv_reader = reader(file)
    for row in csv_reader:
      if row:
        dataset.append(row)
  
  return dataset


def str_col_to_float(dataset, col):
  for row in dataset:
    row[col] = float(row[col].strip())


def str_col_to_int(dataset, col):
  class_vals = set(list(map(lambda row: row[-1], dataset)))
  class_lookup = {}

  for i, val in enumerate(class_vals):
    class_lookup[val] = i
  for row in dataset:
    row[col] = class_lookup[row[col]]
  
  return class_lookup


def dataset_minmax(dataset):
  return [[min(col), max(col)] for col in zip(*dataset)]


def normalize_dataset(dataset):
  minmax = dataset_minmax(dataset=dataset)
  for row in dataset:
    for i in range(len(row) - 1):
      row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])


def cross_validation_split(dataset, n_folds):
  dataset_folds = []
  fold_size = int(len(dataset) / n_folds)
  dataset_ = dataset.copy()

  for _ in range(n_folds):
    fold = []
    while len(fold) < fold_size:
      fold.append(dataset_.pop(randrange(len(dataset_))))
    dataset_folds.append(fold)
  
  return dataset_folds


def accuracy_metric(actual, predicted):
  assert len(actual) == len(predicted)

  size = len(actual)
  correct_count = 0

  for i in range(size):
    correct_count += 1 if actual[i] == predicted[i] else 0

  return (correct_count / float(size)) * 100


def evaluate_algorithm(dataset, algorithm, n_folds, *args):
  folds = cross_validation_split(dataset=dataset, n_folds=n_folds)
  scores = []

  for fold in folds:
    train = folds.copy()
    train.remove(fold)
    train = sum(train, [])
    test = []
    for row in fold:
      row_ = row.copy()
      row_[-1] = None
      test.append(row_)
    predicted = algorithm(train, test, *args)
    actual = list(map(lambda row: row[-1], fold))
    accuracy = accuracy_metric(actual=actual, predicted=predicted)
    scores.append(accuracy)
  
  return scores


def initialize_network(n_inputs, n_hidden, n_outputs):
  network = []

  hidden_layer = [{'weights': [random() for _ in range(n_inputs + 1)]} for _ in range(n_hidden)]
  output_layer = [{'weights': [random() for _ in range(n_hidden + 1)]} for _ in range(n_outputs)]
  network.append(hidden_layer)
  network.append(output_layer)

  return network


def activate(weights, inputs):
  activation = weights[-1]

  for i in range(len(weights) - 1):
    activation += weights[i] * inputs[i]
  
  return activation


def transfer(activation):
  return 1.0 / (1 + exp(-activation))


def forward_propagate(network, row):
  inputs = row.copy()

  for layer in network:
    outputs = []
    for neuron in layer:
      activation = activate(weights=neuron['weights'], inputs=inputs)
      neuron['output'] = transfer(activation=activation)
      outputs.append(neuron['output'])
    inputs = outputs
  
  return inputs


def transfer_derivative(output):
  return output * (1.0 - output)


def backward_propagate_error(network, expected):
  for i in reversed(range(len(network))):
    layer = network[i]
    errors = []
    if i != len(network) - 1:
      for j in range(len(layer)):
        error = 0.0
        for neuron in network[i + 1]:
          error += neuron['weights'][j] * neuron['delta']
        errors.append(error)
    else:
      for j in range(len(layer)):
        neuron = layer[j]
        errors.append(expected[j] - neuron['output'])
    for j in range(len(layer)):
      neuron = layer[j]
      neuron['delta'] = errors[j] * transfer_derivative(output=neuron['output'])


def update_weights(network, row, l_rate):
  for i in range(len(network)):
    inputs = row[:-1]
    if i != 0:
      inputs = [neuron['output'] for neuron in network[i - 1]]
    for neuron in network[i]:
      for j in range(len(inputs)):
        neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]
      neuron['weights'][-1] += l_rate * neuron['delta']


def train_network(network, train, l_rate, n_epochs, n_outputs):
  for epoch in range(n_epochs):
    error_sum = 0.0
    for row in train:
      outputs = forward_propagate(network=network, row=row)
      expected = [0 for _ in range(n_outputs)]
      expected[row[-1]] = 1
      error_sum += sum([pow(expected[i] - outputs[i], 2) for i in range(len(expected))])
      backward_propagate_error(network=network, expected=expected)
      update_weights(network=network, row=row, l_rate=l_rate)
    print(f"epoch={epoch}; l_rate={l_rate}; error={error_sum}")


def predict(network, row):
  outputs = forward_propagate(network=network, row=row)

  return outputs.index(max(outputs))


def backpropagation(train, test, l_rate, n_epochs, n_hidden):
  n_inputs = len(dataset[0]) - 1
  n_outputs = len(set([row[-1] for row in dataset]))
  network = initialize_network(n_inputs=n_inputs, n_hidden=2, n_outputs=n_outputs)
  train_network(network=network, train=train, l_rate=0.5, n_epochs=20, n_outputs=n_outputs)
  predictions = []

  for row in test:
    predictions.append(predict(network=network, row=row))
  
  return predictions


filepath = "../datasets/seeds-dataset.csv"
dataset = load_csv(filepath=filepath)
for col in range(len(dataset[0])):
  str_col_to_float(dataset=dataset, col=col)
str_col_to_int(dataset=dataset, col=len(dataset[0]) - 1)
normalize_dataset(dataset=dataset)

n_folds = 5
l_rate = 0.3
n_epochs = 500
n_hidden = 5

scores = evaluate_algorithm(dataset, backpropagation, n_folds, l_rate, n_epochs, n_hidden)
display(f"scores={scores}")
display(f"mean accuracy={sum(scores) / len(scores)}")

epoch=0; l_rate=0.5; error=119.68360976585653
epoch=1; l_rate=0.5; error=107.34821018333568
epoch=2; l_rate=0.5; error=92.42803618409326
epoch=3; l_rate=0.5; error=77.5182984343216
epoch=4; l_rate=0.5; error=69.10697030572342
epoch=5; l_rate=0.5; error=64.55113349846778
epoch=6; l_rate=0.5; error=61.658074980842535
epoch=7; l_rate=0.5; error=59.43414235366647
epoch=8; l_rate=0.5; error=57.346338938089005
epoch=9; l_rate=0.5; error=55.007791831702676
epoch=10; l_rate=0.5; error=52.10830075004687
epoch=11; l_rate=0.5; error=48.528638049981744
epoch=12; l_rate=0.5; error=44.521683294115945
epoch=13; l_rate=0.5; error=40.614650057931954
epoch=14; l_rate=0.5; error=37.21535903808653
epoch=15; l_rate=0.5; error=34.429869975571826
epoch=16; l_rate=0.5; error=32.18253040543778
epoch=17; l_rate=0.5; error=30.354625950103348
epoch=18; l_rate=0.5; error=28.841278499624355
epoch=19; l_rate=0.5; error=27.561947921828313
epoch=0; l_rate=0.5; error=119.22233955642992
epoch=1; l_rate=0.5; error=112.93

'scores=[76.19047619047619, 85.71428571428571, 90.47619047619048, 90.47619047619048, 95.23809523809523]'

'mean accuracy=87.61904761904762'