# Algorithm Comparison

This notebook compares K-Nearest Neighbors and Gaussian Naive Bayes on the iris dataset.

In [1]:
from src.knn import load_dataset, train_test_split, predict_classification, accuracy_metric

dataset = load_dataset('../data/iris.csv')
train, test = train_test_split(dataset, test_ratio=0.2, seed=1)
predictions = [predict_classification(train, row, 3) for row in test]
actual = [row[-1] for row in test]
knn_accuracy = accuracy_metric(actual, predictions)
print(f"KNN accuracy: {knn_accuracy:.2%}")


KNN accuracy: 100.00%\n

In [2]:
# Gaussian Naive Bayes implementation
import math
from collections import defaultdict

def summarize_dataset(dataset):
    summaries = defaultdict(list)
    for row in dataset:
        *features, label = row
        summaries[label].append(features)
    stats = {}
    for label, rows in summaries.items():
        cols = list(zip(*rows))
        stats[label] = [(sum(col)/len(col), math.sqrt(sum((x - sum(col)/len(col))**2 for x in col)/(len(col)-1))) for col in cols]
    return stats

def calculate_probability(x, mean, stdev):
    if stdev == 0:
        return 1.0 if x == mean else 0.0
    exponent = math.exp(-((x - mean) ** 2 / (2 * stdev ** 2)))
    return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent

def calculate_class_probabilities(summaries, row):
    probabilities = {}
    for label, class_summaries in summaries.items():
        probabilities[label] = 1
        for i, (mean, stdev) in enumerate(class_summaries):
            probabilities[label] *= calculate_probability(row[i], mean, stdev)
    return probabilities

def predict_nb(summaries, row):
    probabilities = calculate_class_probabilities(summaries, row)
    best_label, best_prob = None, -1
    for label, prob in probabilities.items():
        if best_label is None or prob > best_prob:
            best_prob = prob
            best_label = label
    return best_label

summaries = summarize_dataset(train)
nb_predictions = [predict_nb(summaries, row) for row in test]
nb_accuracy = accuracy_metric(actual, nb_predictions)
print(f"Naive Bayes accuracy: {nb_accuracy:.2%}")


Naive Bayes accuracy: 100.00%\n