# K-Nearest Neighbors â€” From Scratch

Euclidean distance based lazy learner. We'll visualize decision boundaries on Iris and tune k.

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler

from knn.knn import KNN

plt.style.use("seaborn-v0_8-darkgrid")

# Load and prepare data (2 features for visualization)
iris = load_iris()
X, y = iris.data[:, :2], iris.target   # use only sepal features for 2D plot

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

# Test multiple k values
k_values = range(1, 21)
accuracies = []
for k in k_values:
    model = KNN(k=k)
    model.fit(X_train_s, y_train)
    accuracies.append(model.score(X_test_s, y_test))

best_k = k_values[np.argmax(accuracies)]
print(f"Best k = {best_k}  |  Accuracy = {max(accuracies):.4f}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# k vs accuracy
axes[0].plot(list(k_values), accuracies, marker="o", color="steelblue", lw=2)
axes[0].axvline(best_k, color="crimson", linestyle="--", label=f"Best k={best_k}")
axes[0].set_title("Accuracy vs k")
axes[0].set_xlabel("k")
axes[0].set_ylabel("Accuracy")
axes[0].legend()

# Decision boundary with best k
knn = KNN(k=best_k)
knn.fit(X_train_s, y_train)

h = 0.05
x_min, x_max = X_train_s[:, 0].min() - 1, X_train_s[:, 0].max() + 1
y_min, y_max = X_train_s[:, 1].min() - 1, X_train_s[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

cmap_bg = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"])
cmap_pt = ListedColormap(["red", "green", "blue"])
axes[1].contourf(xx, yy, Z, alpha=0.4, cmap=cmap_bg)
axes[1].scatter(X_train_s[:, 0], X_train_s[:, 1], c=y_train, cmap=cmap_pt, s=20, edgecolors="k")
axes[1].set_title(f"Decision Boundary (k={best_k})")
axes[1].set_xlabel("Sepal length (scaled)")
axes[1].set_ylabel("Sepal width (scaled)")

plt.tight_layout()
plt.show()