In [127]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

In [128]:
url = "https://raw.githubusercontent.com/digipodium/Datasets/main/classfication/heart.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [129]:
scaler = StandardScaler()
X = df.drop(columns=["target"])
X = scaler.fit_transform(X)
y = df["target"]


In [130]:
clf = DecisionTreeClassifier(random_state=42)
params = {
    'criterion': ["gini", "entropy", "log_loss"],
    'splitter' : ["best", "random"],
    'max_depth': [5,10,15,20,50,100,150,200,250,300,350,400,450,500],
    'min_samples_split': [2,3,4,5,6,7,8,9,10],
}

In [131]:
grid = GridSearchCV(
    estimator=clf,
    param_grid=params,
    cv = 3,
    n_jobs = -1,
    verbose =3,
    return_train_score=True
)

In [132]:
grid.fit(X,y)

Fitting 3 folds for each of 756 candidates, totalling 2268 fits


In [133]:
grid_tree_results = pd.DataFrame(grid.cv_results_)
grid_tree_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_min_samples_split,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,0.001666,0.000471,0.000333,4.714827e-04,gini,5,2,best,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",0.801980,0.712871,0.673267,0.729373,0.053827,538,0.960396,0.920792,0.940594,0.940594,0.016168
1,0.001333,0.000471,0.000333,4.715951e-04,gini,5,2,random,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",0.841584,0.792079,0.752475,0.795380,0.036453,25,0.915842,0.900990,0.935644,0.917492,0.014195
2,0.002373,0.000930,0.000333,4.715951e-04,gini,5,3,best,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",0.801980,0.712871,0.673267,0.729373,0.053827,538,0.960396,0.915842,0.940594,0.938944,0.018227
3,0.001775,0.000189,0.000334,4.729438e-04,gini,5,3,random,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",0.831683,0.792079,0.752475,0.792079,0.032336,30,0.900990,0.900990,0.935644,0.912541,0.016336
4,0.001868,0.000234,0.000000,0.000000e+00,gini,5,4,best,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",0.792079,0.712871,0.673267,0.726073,0.049395,607,0.955446,0.915842,0.935644,0.935644,0.016168
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,0.002043,0.000030,0.000335,4.736182e-04,log_loss,500,8,random,"{'criterion': 'log_loss', 'max_depth': 500, 'm...",0.762376,0.881188,0.712871,0.785479,0.070630,87,0.905941,0.930693,0.940594,0.925743,0.014574
752,0.002042,0.000817,0.001038,8.631691e-04,log_loss,500,9,best,"{'criterion': 'log_loss', 'max_depth': 500, 'm...",0.742574,0.732673,0.712871,0.729373,0.012349,538,0.950495,0.930693,0.935644,0.938944,0.008414
753,0.001704,0.000500,0.000333,4.711456e-04,log_loss,500,9,random,"{'criterion': 'log_loss', 'max_depth': 500, 'm...",0.792079,0.831683,0.732673,0.785479,0.040689,87,0.896040,0.925743,0.910891,0.910891,0.012126
754,0.001790,0.000522,0.001083,9.209063e-04,log_loss,500,10,best,"{'criterion': 'log_loss', 'max_depth': 500, 'm...",0.742574,0.732673,0.693069,0.722772,0.021389,637,0.935644,0.930693,0.920792,0.929043,0.006174


In [134]:
grid_tree_results.sort_values(by='rank_test_score', inplace=True)
grid_tree_results.reset_index(inplace=True)
grid_tree_results

Unnamed: 0,index,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_min_samples_split,param_splitter,params,...,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,15,0.001001,1.123916e-07,0.001001,2.247832e-07,gini,5,9,random,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",...,0.841584,0.772277,0.818482,0.032672,1,0.905941,0.871287,0.891089,0.889439,0.014195
1,177,0.001334,4.705856e-04,0.000333,4.709208e-04,gini,300,9,random,"{'criterion': 'gini', 'max_depth': 300, 'min_s...",...,0.861386,0.752475,0.815182,0.045968,2,0.896040,0.915842,0.905941,0.905941,0.008084
2,231,0.002001,8.151725e-04,0.000000,0.000000e+00,gini,450,9,random,"{'criterion': 'gini', 'max_depth': 450, 'min_s...",...,0.861386,0.752475,0.815182,0.045968,2,0.896040,0.915842,0.905941,0.905941,0.008084
3,69,0.001666,4.721032e-04,0.000000,0.000000e+00,gini,20,9,random,"{'criterion': 'gini', 'max_depth': 20, 'min_sa...",...,0.861386,0.752475,0.815182,0.045968,2,0.896040,0.915842,0.905941,0.905941,0.008084
4,33,0.001667,4.709778e-04,0.000667,4.714829e-04,gini,10,9,random,"{'criterion': 'gini', 'max_depth': 10, 'min_sa...",...,0.861386,0.752475,0.815182,0.045968,2,0.896040,0.915842,0.905941,0.905941,0.008084
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,56,0.002334,4.714828e-04,0.000000,0.000000e+00,gini,20,3,best,"{'criterion': 'gini', 'max_depth': 20, 'min_sa...",...,0.712871,0.683168,0.706271,0.016828,744,0.985149,0.985149,0.995050,0.988449,0.004667
752,182,0.001668,4.725506e-04,0.000667,4.717079e-04,gini,350,3,best,"{'criterion': 'gini', 'max_depth': 350, 'min_s...",...,0.712871,0.683168,0.706271,0.016828,744,0.985149,0.985149,0.995050,0.988449,0.004667
753,146,0.002000,6.836514e-07,0.000333,4.713704e-04,gini,250,3,best,"{'criterion': 'gini', 'max_depth': 250, 'min_s...",...,0.712871,0.683168,0.706271,0.016828,744,0.985149,0.985149,0.995050,0.988449,0.004667
754,110,0.002000,8.104673e-07,0.000667,4.715390e-04,gini,150,3,best,"{'criterion': 'gini', 'max_depth': 150, 'min_s...",...,0.712871,0.683168,0.706271,0.016828,744,0.985149,0.985149,0.995050,0.988449,0.004667


In [135]:
grid_tree_results.columns

Index(['index', 'mean_fit_time', 'std_fit_time', 'mean_score_time',
       'std_score_time', 'param_criterion', 'param_max_depth',
       'param_min_samples_split', 'param_splitter', 'params',
       'split0_test_score', 'split1_test_score', 'split2_test_score',
       'mean_test_score', 'std_test_score', 'rank_test_score',
       'split0_train_score', 'split1_train_score', 'split2_train_score',
       'mean_train_score', 'std_train_score'],
      dtype='object')

In [136]:
px.line(grid_tree_results,
       y=['mean_test_score','mean_train_score'],
       title='Decision Tree Grid Search',
       hover_data=['param_criterion', 'param_splitter', 'param_max_depth','param_min_samples_split'],)

Implenting KneighborsClassifier


In [137]:
clf2 = KNeighborsClassifier()
params_knn = {
    'n_neighbors': [3,5,7,9,10],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'metric': ['euclidean', 'manhattan', 'chebyshev','minkowski'],
}

In [138]:
grid2 = GridSearchCV(
    estimator=clf2,
    param_grid=params_knn,
    cv = 3,
    n_jobs = -1,
    verbose =3,
)

In [139]:
grid2.fit(X,y)

Fitting 3 folds for each of 160 candidates, totalling 480 fits


In [140]:
grid_knn_results = pd.DataFrame(grid2.cv_results_)
grid_knn_results.sort_values(by='rank_test_score', inplace=True)
grid_knn_results.reset_index(inplace=True)
grid_knn_results

Unnamed: 0,index,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_algorithm,param_metric,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,138,0.000668,4.720449e-04,0.037007,4.667943e-02,brute,manhattan,10,uniform,"{'algorithm': 'brute', 'metric': 'manhattan', ...",0.841584,0.861386,0.782178,0.828383,0.033657,1
1,18,0.000999,0.000000e+00,0.004334,4.716514e-04,auto,manhattan,10,uniform,"{'algorithm': 'auto', 'metric': 'manhattan', '...",0.841584,0.861386,0.782178,0.828383,0.033657,1
2,98,0.001002,1.367303e-06,0.004999,2.170642e-06,kd_tree,manhattan,10,uniform,"{'algorithm': 'kd_tree', 'metric': 'manhattan'...",0.841584,0.861386,0.782178,0.828383,0.033657,1
3,58,0.001001,3.371748e-07,0.004335,1.247002e-03,ball_tree,manhattan,10,uniform,"{'algorithm': 'ball_tree', 'metric': 'manhatta...",0.841584,0.861386,0.782178,0.828383,0.033657,1
4,114,0.001000,1.072147e-06,0.004335,4.702464e-04,kd_tree,minkowski,7,uniform,"{'algorithm': 'kd_tree', 'metric': 'minkowski'...",0.851485,0.831683,0.762376,0.815182,0.038204,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,20,0.001667,4.711456e-04,0.003000,7.867412e-07,auto,chebyshev,3,uniform,"{'algorithm': 'auto', 'metric': 'chebyshev', '...",0.722772,0.752475,0.673267,0.716172,0.032672,155
156,100,0.001334,4.695188e-04,0.006001,3.591259e-06,kd_tree,chebyshev,3,uniform,"{'algorithm': 'kd_tree', 'metric': 'chebyshev'...",0.722772,0.752475,0.673267,0.716172,0.032672,155
157,140,0.000667,4.715390e-04,0.003000,3.371748e-07,brute,chebyshev,3,uniform,"{'algorithm': 'brute', 'metric': 'chebyshev', ...",0.732673,0.732673,0.673267,0.712871,0.028004,158
158,61,0.000503,4.087146e-04,0.002668,4.730012e-04,ball_tree,chebyshev,3,distance,"{'algorithm': 'ball_tree', 'metric': 'chebyshe...",0.722772,0.742574,0.673267,0.712871,0.029148,158


In [141]:
print(grid_knn_results.columns.tolist())

['index', 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_algorithm', 'param_metric', 'param_n_neighbors', 'param_weights', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score']


In [142]:
px.line(grid_knn_results,
        y='mean_test_score',
        title = 'KNN Grid Search',
        hover_data=['param_n_neighbors', 'param_weights', 'param_algorithm', 'param_metric'],
        )