In [None]:
import sys
sys.dont_write_bytecode = True

from modules.point import PointDataset
from modules.classifier import ConvexHullClassifier, KNNClassifier
from random import choices
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dataset = PointDataset("./datasets/banana.dat")
points = dataset.getPoints()

In [None]:
CHclf = ConvexHullClassifier(0, 1, points)
CHresults = CHclf.run()

In [None]:
CHresults["Cross Validation Metrics"]

In [None]:
CHresults["Test Metrics"]

In [None]:
KNNclf = KNNClassifier(points, maxK=30)
KNNresults = KNNclf.run()

In [None]:
KNNresults["Cross Validation Metrics"]

In [None]:
KNNresults["Test Metrics"]

In [None]:
meanDiff = pd.DataFrame()

for i in range(100):
    pointSample = choices(points, k=1000)
    
    CHclf = ConvexHullClassifier(0, 1, pointSample)
    CHresults = CHclf.run()["Test Metrics"]

    KNNclf = KNNClassifier(pointSample, maxK=30)
    KNNresults = KNNclf.run()["Test Metrics"]

    meanCH = CHresults.mean()
    meanKNN = KNNresults.mean()
    meanDiff = pd.concat([meanDiff, (meanKNN - meanCH)], ignore_index=True, axis=1)

meanDiff = meanDiff.transpose()

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(16, 10))

ax[0][0].hist(meanDiff["Precision"], bins=20, label="Precision mean difference", color='b')
ax[0][0].legend()
ax[0][0].axvline(x=0, color='r')

ax[0][1].hist(meanDiff["Recall"], bins=20, label="Recall mean difference", color='b')
ax[0][1].legend()
ax[0][1].axvline(x=0, color='r')

ax[1][0].hist(meanDiff["F1"], bins=20, label="F1 mean difference", color='b')
ax[1][0].legend()
ax[1][0].axvline(x=0, color='r')

ax[1][1].hist(meanDiff["Accuracy"], bins=20, label="Accuracy mean difference", color='b')
ax[1][1].legend()
ax[1][1].axvline(x=0, color='r')