In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
import pandas as pd
import glob
import numpy as np
import plotly.express as px

In [None]:
paraTrain= glob.glob("data/training_set/Parasitized/*")
noParaTrain = glob.glob("data/training_set/Uninfected/*")
paraTest= glob.glob("data/testing_set/Parasitized/*")
noParaTest = glob.glob("data/testing_set/Uninfected/*")

In [None]:
np.random.seed(0)
trainSize = 400
paraTrainIndexes = np.random.randint(len(paraTrain), size=trainSize)
paraTrain = [mpimg.imread(paraTrain[i]) for i in paraTrainIndexes]
noParaTrainIndexes = np.random.randint(len(noParaTrain), size=trainSize)
noParaTrain = [mpimg.imread(noParaTrain[i]) for i in noParaTrainIndexes]
trainData = paraTrain + noParaTrain
trainLabels = ["Sick"]*trainSize + ["Healthy"]*trainSize

testSize = 80
paraTestIndexes = np.random.randint(len(paraTest), size=testSize)
paraTest = [mpimg.imread(paraTest[i]) for i in paraTestIndexes]
noParaTestIndexes = np.random.randint(len(noParaTest), size=testSize)
noParaTest = [mpimg.imread(noParaTest[i]) for i in noParaTestIndexes]
testData = paraTest + noParaTest
testLabels = ["Sick"]*testSize + ["Healthy"]*testSize

In [None]:
trainHist = []
for i in trainData:
    flatImage = i.flatten()
    flatImage = flatImage[flatImage != 0]
    trainHist.append(np.histogram(flatImage, bins=20)[0]/float(len(flatImage)))

testHist = []
for i in testData:
    flatImage = i.flatten()
    flatImage = flatImage[flatImage != 0]
    testHist.append(np.histogram(flatImage, bins=20)[0]/float(len(flatImage)))

In [None]:
precision_scores = []
recall_scores = []
bestK = 0
bestF1 = 0
bestConfMat = []
for i in range (1,22,2):
    print("K = ", i)
    model = KNeighborsClassifier(n_neighbors=i)
    model.fit(trainHist,trainLabels)
    predictions = model.predict(testHist)
    accuracy = round(accuracy_score(testLabels, predictions),3)
    recall = round(recall_score(testLabels, predictions,average='macro'),3)
    precision = round(precision_score(testLabels, predictions, average='macro'),3)
    f1 = round(f1_score(testLabels, predictions, average='macro'),3)
    confMat = confusion_matrix(testLabels, predictions)
    precision_scores.append(precision)
    recall_scores.append(recall)
    if (f1 > bestF1):
        bestF1 = f1
        bestK = i
        bestConfMat = confMat

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"F1: {f1}")
    print("Confusion Matrix: \n", confMat)
    print("\n")

In [None]:
print("------------------->Best model<-------------------")
print("K = ", bestK)
print(f"F1: {bestF1}")
print("\n")

In [None]:
k_values = [i for i in range(1,22,2)]
df = pd.DataFrame(list(zip(recall_scores, precision_scores,k_values)),columns =['Recall', 'Precision',"K"])
fig = px.line(df,x="Recall", y="Precision", text="K",title="Precision vs Recall vs K")
fig.show()