In [1]:
import numpy as np
import math
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [2]:
iris = load_iris()

X = iris.data
y = iris.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [4]:
X_train[:10,:]

array([[6.3, 3.3, 4.7, 1.6],
       [6.5, 3. , 5.8, 2.2],
       [5.6, 2.5, 3.9, 1.1],
       [5.7, 2.8, 4.5, 1.3],
       [6.4, 2.8, 5.6, 2.2],
       [4.7, 3.2, 1.6, 0.2],
       [6.1, 3. , 4.9, 1.8],
       [5. , 3.4, 1.6, 0.4],
       [6.4, 2.8, 5.6, 2.1],
       [7.9, 3.8, 6.4, 2. ]])

In [5]:
y_train[:10]

array([1, 2, 1, 1, 2, 0, 2, 0, 2, 2])

In [6]:
def euclidean_distance(point1, point2):
    sum = 0
    for a, b in zip(point1, point2):
        sum += (a - b)**2
    return math.sqrt(sum)

In [7]:
euclidean_distance([1, 2, 3], [4, 5, 6])

5.196152422706632

In [8]:
def get_k_nearest_neighbors(X_train, y_train, test_point, k):
    distances = []
    for i, train_point in enumerate(X_train):
        dist = euclidean_distance(test_point, train_point)
        distances.append((dist, y_train[i]))
        
    # Sort the distances w.r.t. distance and return top k elements
    distances.sort(key=lambda x: x[0])
    return distances[:k]

In [9]:
def predict(neighbours):
    class_votes = {}
    for _, label in neighbours:
        if label not in class_votes:
            class_votes[label] = 0
        class_votes[label] += 1;

    return max(class_votes, key=class_votes.get)

In [10]:
def KNN(X_train, y_train, X_test, k):
    predictions = []
    for test_point in X_test:
        k_neighbours = get_k_nearest_neighbors(X_train, y_train, test_point, k)
        prediction = predict(k_neighbours)
        predictions.append(prediction)
    return predictions

In [11]:
k = 7
predictions = KNN(X_train, y_train, X_test, k)
predictions = [int(x) for x in predictions]

print("Predictions:  ", predictions)
print("Actual values:", y_test.tolist())

# Calculate accuracy
accuracy = sum(pred == actual for pred, actual in zip(predictions, y_test)) / len(y_test)
print(f"Accuracy: {accuracy:.2f}")

Predictions:   [1, 0, 2, 1, 1, 0, 1, 2, 2, 1, 2, 0, 0, 0, 0]
Actual values: [1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0]
Accuracy: 0.93


In [12]:
sklearn_knn = KNeighborsClassifier(n_neighbors=k)
sklearn_knn.fit(X_train, y_train)
sklearn_predictions = sklearn_knn.predict(X_test)
sklearn_accuracy = sklearn_knn.score(X_test, y_test)

# Print results
print("Custom KNN Predictions:      ", predictions)
print("Scikit-learn KNN Predictions:", sklearn_predictions.tolist())
print(f"Custom KNN Accuracy:       {accuracy:.2f}")
print(f"Scikit-learn KNN Accuracy: {sklearn_accuracy:.2f}")

# Compare predictions
disagreements = sum(c != s for c, s in zip(predictions, sklearn_predictions))
print(f"Number of disagreements: {disagreements}")

Custom KNN Predictions:       [1, 0, 2, 1, 1, 0, 1, 2, 2, 1, 2, 0, 0, 0, 0]
Scikit-learn KNN Predictions: [1, 0, 2, 1, 1, 0, 1, 2, 2, 1, 2, 0, 0, 0, 0]
Custom KNN Accuracy:       0.93
Scikit-learn KNN Accuracy: 0.93
Number of disagreements: 0
