In [43]:
from csv import reader
from random import seed, randrange


seed(42)


def load_csv(filepath):
  dataset = []

  with open(filepath, "r") as file:
    csv_reader = reader(file)
    for row in csv_reader:
      if row:
        dataset.append(row)
  
  return dataset


def str_col_to_float(dataset, col):
  for row in dataset:
    row[col] = float(row[col].strip())


def str_col_to_int(dataset, col):
  class_vals = set(list(map(lambda row: row[col], dataset)))
  class_idx_map = dict()

  for idx, cv in enumerate(class_vals):
    class_idx_map[cv] = idx
  for row in dataset:
    row[col] = class_idx_map[row[col]]


def cross_validation_split(dataset, n_folds):
  dataset_folds = []
  fold_size = int(len(dataset) / n_folds)
  dataset_ = dataset.copy()

  for _ in range(n_folds):
    fold = []
    while len(fold) < fold_size:
      fold.append(dataset_.pop(randrange(len(dataset_))))
    dataset_folds.append(fold)
  
  return dataset_folds


def accuracy_metric(actual, predicted):
  assert len(actual) == len(predicted)

  size = len(actual)
  correct_count = 0

  for i in range(size):
    correct_count += 1 if actual[i] == predicted[i] else 0
  
  return (correct_count / float(size)) * 100


def evaluate_algorithm(dataset, algorithm, n_folds, *args):
  folds = cross_validation_split(dataset=dataset, n_folds=n_folds)
  scores = []

  for fold in folds:
    train = folds.copy()
    train.remove(fold)
    train = sum(train, [])
    test = []
    for row in fold:
      row_ = row.copy()
      row_[-1] = None
      test.append(row_)
    predicted = algorithm(train, test, *args)
    actual = list(map(lambda row: row[-1], fold))
    accuracy = accuracy_metric(actual=actual, predicted=predicted)
    scores.append(accuracy)
  
  return scores


def predict(row, weights):
  activation = weights[0]
  
  for i in range(len(row) - 1):
    activation += weights[i + 1] * row[i]
  
  return 1 if activation >= 0 else 0


def train_weights(train, l_rate, n_epochs):
  weights = [0.0] * len(train[0])

  for epoch in range(n_epochs):
    epoch_error = 0.0
    for row in train:
      prediction = predict(row, weights)
      error = row[-1] - prediction
      epoch_error += pow(error, 2)
      weights[0] = weights[0] + l_rate * error
      for i in range(len(row) - 1):
        weights[i + 1] = weights[i + 1] + l_rate * error * row[i]
    print(f"epoch={epoch}; l_rate={l_rate}; error={epoch_error}")
  
  return weights


def perceptron(train, test, l_rate, n_epochs):
  predictions = []
  weights = train_weights(train=train, l_rate=l_rate, n_epochs=n_epochs)
  
  for row in test:
    predictions.append(predict(row=row, weights=weights))
  
  return predictions


filepath = "../datasets/sonar.csv"
dataset = load_csv(filepath=filepath)
n_folds = 3
l_rate = 0.01
n_epochs = 1000

for i in range(len(dataset[0]) - 1):
  str_col_to_float(dataset=dataset, col=i)
str_col_to_int(dataset=dataset, col=len(dataset[0]) - 1)
scores = evaluate_algorithm(dataset, perceptron, n_folds, l_rate, n_epochs)
display(f"scores={scores}")
display(f"mean accuracy={sum(scores) / len(scores)}")

epoch=0; l_rate=0.01; error=61.0
epoch=1; l_rate=0.01; error=55.0
epoch=2; l_rate=0.01; error=53.0
epoch=3; l_rate=0.01; error=46.0
epoch=4; l_rate=0.01; error=45.0
epoch=5; l_rate=0.01; error=52.0
epoch=6; l_rate=0.01; error=45.0
epoch=7; l_rate=0.01; error=41.0
epoch=8; l_rate=0.01; error=42.0
epoch=9; l_rate=0.01; error=44.0
epoch=10; l_rate=0.01; error=39.0
epoch=11; l_rate=0.01; error=38.0
epoch=12; l_rate=0.01; error=44.0
epoch=13; l_rate=0.01; error=33.0
epoch=14; l_rate=0.01; error=40.0
epoch=15; l_rate=0.01; error=39.0
epoch=16; l_rate=0.01; error=43.0
epoch=17; l_rate=0.01; error=34.0
epoch=18; l_rate=0.01; error=38.0
epoch=19; l_rate=0.01; error=34.0
epoch=20; l_rate=0.01; error=40.0
epoch=21; l_rate=0.01; error=36.0
epoch=22; l_rate=0.01; error=36.0
epoch=23; l_rate=0.01; error=42.0
epoch=24; l_rate=0.01; error=34.0
epoch=25; l_rate=0.01; error=37.0
epoch=26; l_rate=0.01; error=40.0
epoch=27; l_rate=0.01; error=34.0
epoch=28; l_rate=0.01; error=43.0
epoch=29; l_rate=0.01; e

'scores=[75.36231884057972, 75.36231884057972, 63.76811594202898]'

'mean accuracy=71.4975845410628'