# K-Nearest Neighbors (KNN) â€“ Step-by-Step
Train a **KNN classifier** to predict **Pass / Fail** using:
- Hours studied
- Practice tests taken

Key idea: KNN is **distance-based**, so we use **feature scaling**.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## 1) Create a small dataset

In [None]:
X = np.array([
    [1, 0],
    [2, 0],
    [2, 1],
    [3, 1],
    [3, 2],
    [4, 2],
    [5, 2],
    [6, 3],
    [7, 3],
    [8, 4],
])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])  # 0=Fail, 1=Pass

print('X shape:', X.shape)
print('y shape:', y.shape)
print('First 5 rows of X:\n', X[:5])
print('First 5 labels:', y[:5])

## 2) Train/Test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)
print('Train size:', len(X_train))
print('Test size:', len(X_test))

## 3) Scale features (important for KNN)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print('Before scaling (first train row):', X_train[0])
print('After scaling  (first train row):', X_train_scaled[0])

## 4) Pick K and train the model

In [None]:
k = 3
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train_scaled, y_train)
print(f'KNN trained with k={k}!')

## 5) Predict and evaluate

In [None]:
y_pred = knn.predict(X_test_scaled)

print('Predictions:', y_pred)
print('Actual:     ', y_test)

print('\nAccuracy:', accuracy_score(y_test, y_pred))
print('\nConfusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('\nClassification Report:\n', classification_report(y_test, y_pred))

## 6) Try your own input (new student)

In [None]:
new_student = np.array([[4, 1]])
new_student_scaled = scaler.transform(new_student)

pred_class = knn.predict(new_student_scaled)[0]
pred_prob = knn.predict_proba(new_student_scaled)[0]

print('New student [hours, practice_tests]:', new_student[0])
print('Predicted class (0=Fail, 1=Pass):', pred_class)
print('Probabilities [P(Fail), P(Pass)]:', pred_prob)

## 7) Optional: visualize the dataset (unscaled)

In [None]:
plt.figure()
for label, marker, name in [(0, 'x', 'Fail'), (1, 'o', 'Pass')]:
    pts = X[y == label]
    plt.scatter(pts[:, 0], pts[:, 1], marker=marker, label=name)
plt.xlabel('Hours studied')
plt.ylabel('Practice tests taken')
plt.title('KNN Dataset (Unscaled)')
plt.legend()
plt.show()

## 8) Optional: try different K values

In [None]:
for k in [1, 3, 5, 7]:
    m = KNeighborsClassifier(n_neighbors=k)
    m.fit(X_train_scaled, y_train)
    pred = m.predict(X_test_scaled)
    acc = accuracy_score(y_test, pred)
    print(f'k={k} -> accuracy={acc:.3f}')