In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [5]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [7]:
pipeline = make_pipeline(StandardScaler(), LogisticRegression(solver='liblinear'))

In [8]:
penalty_options = ['l1', 'l2']
alpha_value = [0.1,0.5,1.0,2.0,5.0,10.0]
parameters = dict(logisticregression__C=alpha_value, logisticregression__penalty=penalty_options)

In [9]:
grid = GridSearchCV(pipeline, parameters, cv=5, scoring='accuracy')
grid.fit(X,y)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('logisticregression',
                                        LogisticRegression(solver='liblinear'))]),
             param_grid={'logisticregression__C': [0.1, 0.5, 1.0, 2.0, 5.0,
                                                   10.0],
                         'logisticregression__penalty': ['l1', 'l2']},
             scoring='accuracy')

In [10]:
grid.best_score_

0.7708853238265002

In [11]:
grid.best_params_

{'logisticregression__C': 0.5, 'logisticregression__penalty': 'l2'}

In [12]:
pipeline = make_pipeline(StandardScaler(), KNeighborsClassifier())

In [13]:
neighbors = [3,5,7,9,11,15,17,21]
weights = ['uniform', 'distance']
algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute']
parameters = dict(kneighborsclassifier__n_neighbors = neighbors,
                 kneighborsclassifier__weights = weights,
                 kneighborsclassifier__algorithm = algorithm)

In [14]:
grid = GridSearchCV(pipeline, parameters, cv=5, scoring='accuracy')
grid.fit(X,y)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('kneighborsclassifier',
                                        KNeighborsClassifier())]),
             param_grid={'kneighborsclassifier__algorithm': ['auto',
                                                             'ball_tree',
                                                             'kd_tree',
                                                             'brute'],
                         'kneighborsclassifier__n_neighbors': [3, 5, 7, 9, 11,
                                                               15, 17, 21],
                         'kneighborsclassifier__weights': ['uniform',
                                                           'distance']},
             scoring='accuracy')

In [15]:
grid.best_score_

0.7695866225277991

In [16]:
grid.best_params_

{'kneighborsclassifier__algorithm': 'auto',
 'kneighborsclassifier__n_neighbors': 17,
 'kneighborsclassifier__weights': 'uniform'}

In [17]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kneighborsclassifier__algorithm,param_kneighborsclassifier__n_neighbors,param_kneighborsclassifier__weights,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004801,0.000751,0.007000,0.000894,auto,3,uniform,"{'kneighborsclassifier__algorithm': 'auto', 'k...",0.720779,0.714286,0.740260,0.790850,0.745098,0.742254,0.026896,33
1,0.003602,0.000492,0.003198,0.000401,auto,3,distance,"{'kneighborsclassifier__algorithm': 'auto', 'k...",0.720779,0.714286,0.733766,0.784314,0.745098,0.739649,0.024733,41
2,0.003601,0.000490,0.005999,0.000633,auto,5,uniform,"{'kneighborsclassifier__algorithm': 'auto', 'k...",0.733766,0.733766,0.714286,0.777778,0.718954,0.735710,0.022437,53
3,0.003600,0.000488,0.003001,0.000002,auto,5,distance,"{'kneighborsclassifier__algorithm': 'auto', 'k...",0.727273,0.727273,0.714286,0.777778,0.725490,0.734420,0.022213,61
4,0.006198,0.004403,0.006734,0.000745,auto,7,uniform,"{'kneighborsclassifier__algorithm': 'auto', 'k...",0.733766,0.707792,0.785714,0.758170,0.725490,0.742187,0.027140,37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.003504,0.000447,0.003199,0.000401,brute,15,distance,"{'kneighborsclassifier__algorithm': 'brute', '...",0.759740,0.759740,0.746753,0.810458,0.771242,0.769587,0.021856,5
60,0.006022,0.004042,0.007296,0.000388,brute,17,uniform,"{'kneighborsclassifier__algorithm': 'brute', '...",0.759740,0.740260,0.766234,0.810458,0.771242,0.769587,0.022984,1
61,0.003400,0.000491,0.003800,0.000400,brute,17,distance,"{'kneighborsclassifier__algorithm': 'brute', '...",0.759740,0.740260,0.753247,0.810458,0.771242,0.766989,0.023931,9
62,0.003200,0.000400,0.007400,0.000800,brute,21,uniform,"{'kneighborsclassifier__algorithm': 'brute', '...",0.759740,0.707792,0.759740,0.777778,0.790850,0.759180,0.028250,21
