In [6]:
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd

import numpy as np

In [7]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [8]:
def knn_predict(X_train, y_train, X_test, k=3):
    predictions = []
    
    for x in X_test:
        # Step 1: Compute distances to all training points
        distances = [euclidean_distance(x, x_train) for x_train in X_train]
        
        # Step 2: Get indices of k nearest neighbors
        k_indices = np.argsort(distances)[:k]
        
        # Step 3: Get the labels of the k nearest neighbors
        k_nearest_labels = [y_train[i] for i in k_indices]
        
        # Step 4: Determine the most common label
        most_common = Counter(k_nearest_labels).most_common(1)[0][0]
        
        # Step 5: Append prediction
        predictions.append(most_common)
    
    return predictions

In [9]:
while True:
    try:
        k = int(input("Enter the value of k (must be an integer > 0): "))
        if k > 0 and k < 9:
            break
        else:
            print("Please enter a number between 0 and 4")
    except ValueError:
        print("Invalid input. Please enter a valid integer.")


In [10]:
iris = load_iris()
X, y = iris.data, iris.target

# --------- Split dataset (80% train, 20% test) ---------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# --------- Train and Predict ---------
knn = KNN(k=k)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)

# --------- Evaluate ---------
correct, wrong = [], []

for i in range(len(y_test)):
    if predictions[i] == y_test[i]:
        correct.append((X_test[i], predictions[i]))
    else:
        wrong.append((X_test[i], predictions[i], y_test[i]))

In [11]:

print("\nFirst 3 Predictions:")
for i in range(3):
    print(f"Sample {i}: Predicted = {predictions[i]}, Actual = {y_test[i]}")


report = classification_report(y_test, predictions, target_names=iris.target_names, output_dict=True)
report_df = pd.DataFrame(report).transpose()


report_df[['precision', 'recall', 'f1-score']] = report_df[['precision', 'recall', 'f1-score']].round(2)
report_df['support'] = report_df['support'].astype(int)

print("\nClassification Report:")
print(report_df[['precision', 'recall', 'f1-score', 'support']])


First 3 Predictions:
Sample 0: Predicted = 2, Actual = 1
Sample 1: Predicted = 2, Actual = 2
Sample 2: Predicted = 2, Actual = 2

Classification Report:
              precision  recall  f1-score  support
setosa             1.00    1.00      1.00       13
versicolor         1.00    0.83      0.91        6
virginica          0.92    1.00      0.96       11
accuracy           0.97    0.97      0.97        0
macro avg          0.97    0.94      0.96       30
weighted avg       0.97    0.97      0.97       30
