In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.model_selection import ShuffleSplit
import os

import sys
sys.path.append("..")
from utils.serialization import save_model, load_model

### Dataset

In [2]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y)

### Grid Search on KNN

In [3]:
pipe_knn = Pipeline([('pca', PCA()), ('knn', KNeighborsClassifier())])
param_grid = [
    {
        'pca__n_components': range(1, X_train.shape[1] + 1),
        'knn__n_neighbors': range(1, 8)
    }
]
gs = GridSearchCV(
    estimator=pipe_knn,
    param_grid=param_grid,
    scoring='accuracy'
)

gs.fit(X_train, y_train)
# print(pd.DataFrame(gs.cv_results_).head(10))
print(f"best_params: {gs.best_params_}\nbest_acc: {gs.best_score_:.4f} %")

best_params: {'knn__n_neighbors': 6, 'pca__n_components': 5}
best_acc: 0.9298 %


### Evaluation

In [4]:
prediction = gs.predict(X_test)
acc = accuracy_score(y_test, prediction)
recall = recall_score(y_test, prediction, average='macro')
precision = precision_score(y_test, prediction, average='macro')
f1 = f1_score(y_test, prediction, average='macro')
print(f"accuracy: {acc * 100:.1f} %")
print(f"recall: {recall * 100:.1f} %")
print(f"precision: {precision * 100:.1f} %")
print(f"f1_score: {f1 * 100:.1f} %")

accuracy: 93.0 %
recall: 92.5 %
precision: 92.5 %
f1_score: 92.5 %


### Save Model

In [5]:
path = os.path.join(os.getcwd(), "trained_models/model_KNN.pickle")
save_model(path, gs)

### Load Model

In [6]:
path = os.path.join(os.getcwd(), "trained_models/model_KNN.pickle")
model = load_model(path)
model.predict(X_test)

array([1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0])