In [4]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
# Load dataset
data = load_iris()
X = data.data
y = data.target

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
# Initialize a KNeighborsClassifier model
knn = KNeighborsClassifier()

In [8]:
# Define the parameter grid to search over
param_grid = {
    'n_neighbors': [3, 5, 7, 9],         # Number of neighbors
    'weights': ['uniform', 'distance'],  # Weight function
    'metric': ['euclidean', 'manhattan'] # Distance metric
}

In [10]:
# Initialize GridSearchCV with the KNN model and parameter grid
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='f1_macro', verbose=2, n_jobs=-1)

# Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Output the best parameters and best score from the grid search
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation accuracy: {grid_search.best_score_}")

Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best parameters: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
Best cross-validation accuracy: 0.953970169264287


In [11]:
# Use the best estimator to make predictions on the test set
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

In [13]:
# Evaluate the model performance on the test set
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 1.0
