In [220]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import h5py
from kNN import CustomKNeighborsClassifier, CustomSortedKNeighborsClassifier

import time

In [211]:
with h5py.File("../data/train_data.h5", "r") as f:
    X_train = f["X"][:]
    y_train = f["y"][:]

with h5py.File("../data/test_data.h5", "r") as f:
    X_test = f["X"][:]
    y_test = f["y"][:]

In [212]:
# Fit the models
n_neighbours = 3

model_sk = KNeighborsClassifier(n_neighbors=n_neighbours)
model_sk.fit(X_train, y_train)

model_custom = CustomKNeighborsClassifier(n_neighbors=n_neighbours)
model_custom.fit(X_train, y_train)

model_custom_sort = CustomSortedKNeighborsClassifier(n_neighbors=n_neighbours)
model_custom_sort.fit(X_train, y_train)

In [213]:
%%timeit # 548 μs
y_pred_sk = model_sk.predict(X_test)

560 μs ± 9.15 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [214]:
%%timeit # 635 μs 
y_pred_custom = model_custom.predict(X_test)

633 μs ± 568 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [215]:
%%timeit # 632 μs
y_pred_custom_sort = model_custom_sort.predict(X_test)    

637 μs ± 4.85 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [217]:
y_pred_sk = model_sk.predict(X_test)
y_pred_custom = model_custom.predict(X_test)
y_pred_custom_sort = model_custom_sort.predict(X_test)    

accuracy_sk = accuracy_score(y_pred_sk, y_test)
print(f"Accuracy (sk): {accuracy_sk:%}")

accuracy_custom = accuracy_score(y_pred_custom, y_test)
print(f"Accuracy (custom): {accuracy_sk:%}")

accuracy_custom_sort = accuracy_score(y_pred_custom_sort, y_test)
print(f"Accuracy (custom sorted): {accuracy_custom_sort:%}")

Accuracy (sk): 80.612245%
Accuracy (custom): 80.612245%
Accuracy (custom sorted): 79.591837%


In [231]:
def train_and_fit_sk(n_neighbours):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    return model.predict(X_test)

def train_and_fit_custom(n_neighbours):
    model = CustomKNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    return model.predict(X_test)

def train_and_fit_custom_sort(n_neighbours):
    model = CustomSortedKNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    return model.predict(X_test)

K = 10
label_width = 25

start = time.perf_counter()
y_pred_multi_sk = np.array([train_and_fit_sk(n_neighbours) for n_neighbours in range(1,K)])
end = time.perf_counter()
print(f"{'Elapsed (sk):':<{label_width}} {end - start:.6f} seconds")

start = time.perf_counter()
y_pred_multi_custom = np.array([train_and_fit_custom(n_neighbours) for n_neighbours in range(1,K)])
end = time.perf_counter()
print(f"{'Elapsed (Custom):':<{label_width}} {end - start:.6f} seconds")

start = time.perf_counter()
y_pred_multi_custom_sort = np.array([train_and_fit_custom_sort(n_neighbours) for n_neighbours in range(1,K)])
end = time.perf_counter()
print(f"{'Elapsed (Custom Sort):':<{label_width}} {end - start:.6f} seconds")

Elapsed (sk):             0.009398 seconds
Elapsed (Custom):         0.005859 seconds
Elapsed (Custom Sort):    0.005897 seconds
