# Tie-Breaking Explanation
let’s build a tiny synthetic dataset where the manual KNN and scikit‑learn’s KNeighborsClassifier make different decisions.

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# Synthetic dataset: 4 samples, 2 features (categorical encoded as integers)
X_train = np.array([
    [1, 0],  # Class B
    [0, 0],  # Class A
    [1, 1],  # Class B
    [0, 1],  # Class A
])
y_train = np.array(["B", "A", "B", "A"])

# Test point
x_test = np.array([0, 0])  # closer to A samples

# Manual Hamming distance
def hamming_distance(x1, x2):
    return np.sum(x1 != x2)

def knn_manual(X_train, y_train, x_test, k=2):
    distances = [(hamming_distance(x_test, X_train[i]), y_train[i]) for i in range(len(y_train))]
    distances.sort(key=lambda x: x[0])
    top_k = [label for _, label in distances[:k]]
    return max(set(top_k), key=top_k.count)

print("Manual KNN:", knn_manual(X_train, y_train, x_test, k=2))

# sklearn KNN with custom metric
clf = KNeighborsClassifier(n_neighbors=2, metric=hamming_distance, algorithm="brute", weights="uniform")
clf.fit(X_train, y_train)
print("sklearn KNN:", clf.predict([x_test])[0])

Manual KNN: B
sklearn KNN: A


- `max(set(top_k), key=top_k.count)` breaks ties by picking the first label encountered in the set iteration order.
- scikit‑learn KNN: has its own deterministic tie‑breaking (often based on class label ordering or internal neighbor ordering).