In [1]:
from random import seed, randrange
from csv import reader
from functools import reduce


seed(42)


def load_csv(filepath):
  dataset = []
  with open(filepath, "r") as file:
    csv_reader = reader(file)
    for row in csv_reader:
      if row:
        dataset.append(row)
  return dataset


def str_col_to_float(dataset, col):
  for row in dataset:
    row[col] = float(row[col].strip())


def train_test_split(dataset, split=0.6):
  train = []
  train_size = len(dataset) * split
  test = dataset.copy()

  while len(train) < train_size:
    idx = randrange(len(test))
    train.append(test.pop(idx))
  
  return train, test


def accuracy_metric(actual, predicted):
  assert len(actual) == len(predicted)
  
  correct_count = 0
  
  for i in range(len(actual)):
    correct_count += 1 if actual[i] == predicted[i] else 0
  
  return (correct_count / len(actual)) * 100.0

In [17]:
def evaluate_algorithm(dataset, algorithm, split, *args):
  train, test = train_test_split(dataset, split)
  test_set = list()
  
  for row in test:
    row_copy = row.copy()
    row_copy[-1] = None
    test_set.append(row_copy)
  predicted = algorithm(train, test_set, *args)
  actual = [row[-1] for row in test]
  accuracy = accuracy_metric(actual, predicted)
  
  return accuracy


def zero_rule_algorithm_classification(train, test):
  output_vals = list(map(lambda row: row[-1], train))
  prediction = max(set(output_vals), key=output_vals.count)
  return [prediction] * len(test)

In [26]:
filepath = "../datasets/pima-indians-diabetes.csv"
dataset = load_csv(filepath=filepath)

for i in range(len(dataset[0])):
  str_col_to_float(dataset=dataset, col=i)

accuracy = evaluate_algorithm(dataset=dataset, algorithm=zero_rule_algorithm_classification, split=0.6)
display(f"accuracy: {accuracy:.2f}%")

'accuracy: 67.10%'