In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

In [34]:
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()
df = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)
df['target'] = breast_cancer.target

In [35]:
df.sample(4)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
121,18.66,17.12,121.4,1077.0,0.1054,0.11,0.1457,0.08665,0.1966,0.06213,...,24.9,145.4,1549.0,0.1503,0.2291,0.3272,0.1674,0.2894,0.08456,0
9,12.46,24.04,83.97,475.9,0.1186,0.2396,0.2273,0.08543,0.203,0.08243,...,40.68,97.65,711.4,0.1853,1.058,1.105,0.221,0.4366,0.2075,0
57,14.71,21.59,95.55,656.9,0.1137,0.1365,0.1293,0.08123,0.2027,0.06758,...,30.7,115.7,985.5,0.1368,0.429,0.3587,0.1834,0.3698,0.1094,0
335,17.06,21.0,111.8,918.6,0.1119,0.1056,0.1508,0.09934,0.1727,0.06071,...,33.15,143.2,1362.0,0.1449,0.2053,0.392,0.1827,0.2623,0.07599,0


In [36]:
X = df.drop(columns=['target'])
y = df.target

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [38]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [39]:
classifier = KNeighborsClassifier()
classifier.fit(X_train, y_train)

In [40]:
y_pred = classifier.predict(X_test)

In [41]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy: .2f}')

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix: ")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print('Classification Report: ')
print(class_report)

Accuracy:  0.96
Confusion Matrix: 
[[51  3]
 [ 3 86]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        54
           1       0.97      0.97      0.97        89

    accuracy                           0.96       143
   macro avg       0.96      0.96      0.96       143
weighted avg       0.96      0.96      0.96       143



In [42]:
n_neighbors = list(range(1,21))

param_grid = {'n_neighbors': n_neighbors}

grid_search = GridSearchCV(classifier, param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

print("Best Hyperparameter for Classification: ")
print(best_params)

best_estimator = grid_search.best_estimator_
y_pred = best_estimator.predict(X_test)

Best Hyperparameter for Classification: 
{'n_neighbors': 18}


In [43]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy: .2f}')

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix: ")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print('Classification Report: ')
print(class_report)

Accuracy:  0.97
Confusion Matrix: 
[[51  3]
 [ 2 87]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.96      0.94      0.95        54
           1       0.97      0.98      0.97        89

    accuracy                           0.97       143
   macro avg       0.96      0.96      0.96       143
weighted avg       0.97      0.97      0.96       143

