In [None]:
l = [1,2]

index = len(l) // 2

print(l[:index])
print(l[index+2:])

### auxiliary structures

In [None]:
def distance(p1, p2):
    d = 0
    for i in range(len(p1)):
        d += pow(p1[i] - p2[i], 2)
    return d

def most_frequent(List):
    return max(set(List), key = List.count)

def sort_list_by_index(l, index):
    return sorted(l, key=lambda x: x[index])

class Node:
    def __init__(self, data, lvl):
        self.data = data
        self.lvl = lvl
        self.left_side = []
        self.right_side = []

class KDTree:
    def fit(self, data):
        self.dimensions = len(data[0])
        lvl = 0

        self.root = self.__create_next_lvl(data, 0)

    def __create_next_lvl(self, lvl, data):
        if len(data) == 0: return None

        # Decides which feature should be used at the current level
        sortedData = sort_list_by_index(data, lvl % self.dimensions)

        lvlRootIndex = len(sortedData) // 2
        lvlRootNode = Node(sortedData[lvlRootIndex], lvl)

        lvlRootNode.left_side = sortedData[:lvlRootIndex]
        lvlRootNode.right_side = sortedData[lvlRootIndex + 1:]

        return lvlRootNode

### KNN

In [None]:
class KNN:    
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_predict, k = 1):
        predictions = []
        
        for x_pred in X_predict:
            closest_points = self.__get_closest_points(x_pred, k)
            prediction = self.__get_prediction(closest_points)
            predictions.append(prediction)

        return predictions


    # Brute force

    def __get_closest_points(self, x_pred, k):
        closest_points = []

        for i in range(len(self.X_train)):
            x_train = self.X_train[i]
            dist = distance(x_pred, x_train)
            closest_points = self.__add_point_to_closest_points(i, dist, k, closest_points)
        
        return closest_points


    def __add_point_to_closest_points(self, point_index, dist, k, closest_points):
        for i in range(k):
            if i == len(closest_points):
                closest_points.append((point_index, dist))
                break
            else:
                if dist < closest_points[i][1]:
                    closest_points.insert(i, (point_index, dist))
                    break
        return closest_points
            
    def __get_prediction(self, closest_points):
        classes = []

        for point in closest_points:
            classes.append(self.y_train[point[0]][0])
        
        return most_frequent(classes)

### Pre-processing

In [None]:
from cProfile import label
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from warnings import filterwarnings
filterwarnings('ignore')

df = pd.read_csv("penguins.csv")
df = df[['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)', 'Species']]
df = df.dropna()

X = df[['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']]
y = df[['Species']]

scaler = StandardScaler().fit(X.values)
features = scaler.transform(X.values)

X[X.columns[:]] = features

### KNN Brute force

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

x_train_list = X_train.values.tolist()
y_train_list = y_train.values.tolist()
X_test_list = X_test.values.tolist()

knn = KNN()

knn.fit(x_train_list, y_train_list)
y_pred = knn.predict(X_test_list, k=3)

print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred, labels=df['Species'].unique())
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=df['Species'].unique())
disp.plot(xticks_rotation='vertical')
plt.show()