In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import ConfusionMatrixDisplay, precision_score, recall_score, f1_score, classification_report, roc_curve, roc_auc_score, log_loss

In [2]:
df = pd.read_csv('Kyphosis.csv')
df.head(2)

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14


In [3]:
dum_hr = pd.get_dummies(df, drop_first=True)
dum_hr.head(2)

Unnamed: 0,Age,Number,Start,Kyphosis_present
0,71,3,5,False
1,158,3,14,False


In [6]:
X = dum_hr.drop('Kyphosis_present', axis=1)
y = dum_hr['Kyphosis_present']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, stratify=y,random_state=24)

In [10]:
knn = KNeighborsClassifier()

scaler = StandardScaler()
mnx_scaler = MinMaxScaler()

pipe = Pipeline([('SCL', None), ('KNN', knn)])
params = {'KNN__n_neighbors': np.arange(1, 40), 'SCL':[None, scaler, mnx_scaler]} 
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold, scoring='neg_log_loss')
gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'KNN__n_neighbors': 9, 'SCL': MinMaxScaler()}
-0.3541342613432673
