In [5]:
from random import seed, randrange
from csv import reader


seed(42)


def load_csv(filepath):
  dataset = []
  
  with open(filepath, "r") as file:
    csv_reader = reader(file)
    for row in csv_reader:
      if row:
        dataset.append(row)
  
  return dataset


def str_col_to_float(dataset, col):
  for row in dataset:
    row[col] = float(row[col].strip())


def cross_validation_split(dataset, n_folds):
  folds = []
  _dataset = dataset.copy()
  fold_size = int(len(_dataset) / n_folds)

  for _ in range(n_folds):
    fold = []
    while len(fold) < fold_size:
      idx = randrange(len(_dataset))
      fold.append(_dataset.pop(idx))
    folds.append(fold)
  
  return folds


def accuracy_metric(actual, predicted):
  assert len(actual) == len(predicted)

  correct_count = 0
  
  for i in range(len(actual)):
    correct_count += 1 if actual[i] == predicted[i] else 0

  return (correct_count / len(actual)) * 100.0

In [17]:
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
  folds = cross_validation_split(dataset=dataset, n_folds=n_folds)
  scores = []

  for fold in folds:
    train = folds.copy()
    train.remove(fold)
    train = sum(train, [])
    test = []
    for row in fold:
      _row = row.copy()
      _row[-1] = None
      test.append(_row)
    predicted = algorithm(train, test, *args)
    actual = list(map(lambda row: row[-1], fold))
    accuracy = accuracy_metric(actual=actual, predicted=predicted)
    scores.append(accuracy)
  
  return scores

def zero_rule_algorithm_classification(train, test):
  output_vals = list(map(lambda row: row[-1], train))
  prediction = max(set(output_vals), key=output_vals.count)
  predicted = [prediction] * len(test)
  
  return predicted

In [27]:
filepath = "../datasets/pima-indians-diabetes.csv"
dataset = load_csv(filepath=filepath)

for i in range(len(dataset[0])):
  str_col_to_float(dataset=dataset, col=i)

scores = evaluate_algorithm(dataset=dataset, algorithm=zero_rule_algorithm_classification, n_folds=5)
display(f"Scores: {scores}")
display(f"Mean score: {(sum(scores) / len(scores)):.2f}%")

'Scores: [66.01307189542483, 66.01307189542483, 61.43790849673203, 62.745098039215684, 68.62745098039215]'

'Mean score: 64.97%'