In [13]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from collections import defaultdict

In [14]:
csv_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
col_names = ["Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width", "Species"]

iris = pd.read_csv(csv_url, names=col_names)
iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:

# Train K-Nearest Neighbors classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Make predictions
y_pred = knn.predict(X_test)


In [16]:

accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy:.2f}%')

results = {"correct":[],"incorrect":[]}

for i,(p,a) in enumerate(zip(y_pred, y_test)):
    if p == a:
        results["correct"].append((f'Test sample {i + 1}: Predicted={p}, Actual={a}'))
    else:
        results["incorrect"].append((f'Test sample {i + 1}: Predicted={p}, Actual={a}'))

pprint.pp(results)

Model Accuracy: 1.00%
{'correct': ['Test sample 1: Predicted=1, Actual=1',
             'Test sample 2: Predicted=0, Actual=0',
             'Test sample 3: Predicted=2, Actual=2',
             'Test sample 4: Predicted=1, Actual=1',
             'Test sample 5: Predicted=1, Actual=1',
             'Test sample 6: Predicted=0, Actual=0',
             'Test sample 7: Predicted=1, Actual=1',
             'Test sample 8: Predicted=2, Actual=2',
             'Test sample 9: Predicted=1, Actual=1',
             'Test sample 10: Predicted=1, Actual=1',
             'Test sample 11: Predicted=2, Actual=2',
             'Test sample 12: Predicted=0, Actual=0',
             'Test sample 13: Predicted=0, Actual=0',
             'Test sample 14: Predicted=0, Actual=0',
             'Test sample 15: Predicted=0, Actual=0',
             'Test sample 16: Predicted=1, Actual=1',
             'Test sample 17: Predicted=2, Actual=2',
             'Test sample 18: Predicted=1, Actual=1',
             'T