### The best model for predicting iris species from the iris dataset according to its accuracy

# imports

In [38]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from matplotlib.colors import ListedColormap
import numpy as np

# Loading The Dataset and splitting it into training and testing sets

In [39]:
# Load dataset
data = load_iris()
X = data.data[:, 2:]  # Use only the last two features for visualization
real_X = data.data
y = data.target

# Split the dataset into training and testing sets
# We use real_X to decide the best model
real_X_train, real_X_test, y_train, y_test = train_test_split(real_X, y, test_size=0.2, random_state=42)
X_train  = real_X_train[:, 2:]
X_test = real_X_test[:, 2:]

In [40]:
# different metrics to use for KNN
metrics = ['euclidean', 'manhattan', 'minkowski', 'chebyshev', 'cosine']

# Test different values of k
k_values = range(1, 21)

# Store the different models for each metric
metrics_models = []

# Tuning Hyper Parameters

In [41]:
max_accuracy_all = 0
best_model_all = None

for metric in metrics:
    if metric == 'minkowski':
        p = 3
    else :
        # default value for p
        p = 2

    max_accuracy = 0
    best_model = None

    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k , metric=metric, p=p)
        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        if ( accuracy > max_accuracy):
            max_accuracy = accuracy
            best_model = knn
        print(f'k={k} metric={metric} p={p} accuracy={accuracy}')

    if (max_accuracy > max_accuracy_all):
        max_accuracy_all = max_accuracy
        best_model_all = best_model

k=1 metric=euclidean p=2 accuracy=1.0
k=2 metric=euclidean p=2 accuracy=0.9666666666666667
k=3 metric=euclidean p=2 accuracy=1.0
k=4 metric=euclidean p=2 accuracy=1.0
k=5 metric=euclidean p=2 accuracy=1.0
k=6 metric=euclidean p=2 accuracy=1.0
k=7 metric=euclidean p=2 accuracy=1.0
k=8 metric=euclidean p=2 accuracy=1.0
k=9 metric=euclidean p=2 accuracy=1.0
k=10 metric=euclidean p=2 accuracy=1.0
k=11 metric=euclidean p=2 accuracy=1.0
k=12 metric=euclidean p=2 accuracy=1.0
k=13 metric=euclidean p=2 accuracy=1.0
k=14 metric=euclidean p=2 accuracy=1.0
k=15 metric=euclidean p=2 accuracy=1.0
k=16 metric=euclidean p=2 accuracy=1.0
k=17 metric=euclidean p=2 accuracy=1.0
k=18 metric=euclidean p=2 accuracy=1.0
k=19 metric=euclidean p=2 accuracy=1.0
k=20 metric=euclidean p=2 accuracy=1.0
k=1 metric=manhattan p=2 accuracy=1.0
k=2 metric=manhattan p=2 accuracy=0.9666666666666667
k=3 metric=manhattan p=2 accuracy=1.0
k=4 metric=manhattan p=2 accuracy=1.0
k=5 metric=manhattan p=2 accuracy=1.0
k=6 metri

In [42]:
print(f'Best model: {best_model_all} with accuracy: {max_accuracy_all}')

Best model: KNeighborsClassifier(metric='euclidean', n_neighbors=1) with accuracy: 1.0
