In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
cancer_data = load_breast_cancer()
X = cancer_data.data
y = cancer_data.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the K-Nearest Neighbors model
knn_model = KNeighborsClassifier()

In [4]:
parameters = {'n_neighbors': [3, 5, 7, 9],
              'weights': ['uniform', 'distance'],
              'p': [1, 2]}


grid_cv = GridSearchCV(knn_model, parameters, scoring='accuracy', cv=5)
grid_cv.fit(X_train, y_train)

In [6]:
best_n_neighbors = grid_cv.best_params_['n_neighbors']
best_weights = grid_cv.best_params_['weights']
best_p = grid_cv.best_params_['p'] 

# Print the best hyperparameters
print(f'Best n_neighbors: {best_n_neighbors}')
print(f'Best weights: {best_weights}')
print(f'Best p: {best_p}')


Best n_neighbors: 7
Best weights: distance
Best p: 1


In [7]:
best_model = grid_cv.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy on test set: {accuracy:.2f}')
print(f'Classification Report:\n{report}')


Accuracy on test set: 0.94
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.86      0.91        43
           1       0.92      0.99      0.95        71

    accuracy                           0.94       114
   macro avg       0.95      0.92      0.93       114
weighted avg       0.94      0.94      0.94       114

