In [22]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import Normalizer
import matplotlib.pyplot as plt

In [23]:
iris = load_iris()
x = iris.data
y = iris.target


In [24]:
X_train, X_temp, y_train, y_temp = train_test_split(x, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [25]:
scaler = Normalizer().fit(X_train)
X_train_normalized = scaler.transform(X_train)
X_val_normalized = scaler.transform(X_val)
X_test_normalized = scaler.transform(X_test)

In [26]:
best_k = None
best_accuracy = 0

# Find the best value for k
for k in range(1, 21):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_normalized, y_train)
    y_pred = knn.predict(X_val_normalized)
    accuracy = accuracy_score(y_val, y_pred)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_k = k

print(f"best k value is : {best_k}")
print(f"highest accuracry is : {best_accuracy}")

best k value is : 1
highest accuracry is : 1.0


In [27]:
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train_normalized, y_train)

In [28]:
# Calculate accuracy and confusion matrix for training, validation, and testing sets
train_accuracy = accuracy_score(y_train, knn.predict(X_train_normalized))
val_accuracy = accuracy_score(y_val, knn.predict(X_val_normalized))
test_accuracy = accuracy_score(y_test, knn.predict(X_test_normalized))

train_confusion_matrix = confusion_matrix(y_train, knn.predict(X_train_normalized))
val_confusion_matrix = confusion_matrix(y_val, knn.predict(X_val_normalized))
test_confusion_matrix = confusion_matrix(y_test, knn.predict(X_test_normalized))

In [29]:
print(f'Best k value: {best_k}')
print(f'Training Accuracy: {train_accuracy:.2f}')
print(f'Validation Accuracy: {val_accuracy:.2f}')
print(f'Testing Accuracy: {test_accuracy:.2f}')
print('Training Confusion Matrix:')
print(train_confusion_matrix)
print('Validation Confusion Matrix:')
print(val_confusion_matrix)
print('Testing Confusion Matrix:')
print(test_confusion_matrix)

Best k value: 1
Training Accuracy: 1.00
Validation Accuracy: 1.00
Testing Accuracy: 0.96
Training Confusion Matrix:
[[31  0  0]
 [ 0 37  0]
 [ 0  0 37]]
Validation Confusion Matrix:
[[13  0  0]
 [ 0  3  0]
 [ 0  0  6]]
Testing Confusion Matrix:
[[ 6  0  0]
 [ 0 10  0]
 [ 0  1  6]]
