# Description

In [230]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV, cross_val_predict

In [231]:
# Load the Iris dataset into X and y variables
X, y = load_iris(return_X_y=True)

# Preprocessing

In [232]:
# Convert the data type of X and y to float16 and int8, respectively
X = X.astype(np.float16)
y = y.astype(np.int8)

In [233]:
# Standardize the input features by scaling them to have zero mean and unit variance using the StandardScaler
scaler = StandardScaler().fit(X)
X = scaler.transform(X)

In [234]:
knn = KNeighborsClassifier()

In [235]:
param_grid = {"n_neighbors": np.arange(1, 10)}

grid = GridSearchCV(
    knn,
    param_grid,
)

In [236]:
grid.fit(X, y)

# Evaluation

In [237]:
grid.best_params_

{'n_neighbors': 6}

In [238]:
grid.best_score_

0.9666666666666668

The `best_params_` attribute of the `grid` variable reveals the best performing value for `k` in k-NN algorithm is six (6). Using the said hyperparameter resulting in an accuracy score of around 97%.

In [239]:
y_pred = cross_val_predict(knn, X, y)
confusion_matrix = confusion_matrix(y, y_pred)

In [240]:
confusion_matrix

array([[50,  0,  0],
       [ 0, 47,  3],
       [ 0,  3, 47]])

The `confusion_matrix` variable above can be translated to the table below.

|          | Predicted Setosa | Predicted Versicolor | Predicted Virginica |
|----------|------------------|-----------------------|-----------------------|
| Actual Setosa | 50 | 0 | 0 |
| Actual Versicolor | 0 | 46 | 4 |
| Actual Virginica | 0 | 3 | 47 |