In [2]:
import pandas as pd
from sklearn.metrics import precision_score,accuracy_score,recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [3]:
heart_df = pd.read_csv("../heart.csv")
x = heart_df.drop("target",axis=1) 
y = heart_df["target"]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)


In [4]:
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(x_train_scaled,y_train)


In [5]:
y_pred = knn_classifier.predict(x_test_scaled)

In [6]:
print("recall score: ",recall_score(y_test,y_pred))
print("accuracy score: ",accuracy_score(y_test,y_pred))
print("precision score: ", precision_score(y_test,y_pred))

recall score:  0.78125
accuracy score:  0.8524590163934426
precision score:  0.9259259259259259


In [7]:
from sklearn.model_selection import GridSearchCV
param_grid = {}
classfier = KNeighborsClassifier()
param_grid = {"n_neighbors":[3,5,6,9]}
classfierCV = GridSearchCV(classfier,param_grid,cv=5)
classfierCV.fit(x_test_scaled,y_test)
print("recall score: ",recall_score(y_test,y_pred))
print("accuracy score: ",accuracy_score(y_test,y_pred))
print("precision score: ", precision_score(y_test,y_pred))

recall score:  0.78125
accuracy score:  0.8524590163934426
precision score:  0.9259259259259259


In [8]:
res = pd.DataFrame(classfierCV.cv_results_)
print(res)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       0.001658      0.000547         0.003228        0.000391   
1       0.000844      0.000430         0.003444        0.001518   
2       0.001001      0.000001         0.003199        0.000979   
3       0.001474      0.000593         0.002202        0.000400   

  param_n_neighbors              params  split0_test_score  split1_test_score  \
0                 3  {'n_neighbors': 3}           0.923077           0.750000   
1                 5  {'n_neighbors': 5}           0.923077           0.833333   
2                 6  {'n_neighbors': 6}           0.923077           0.833333   
3                 9  {'n_neighbors': 9}           0.923077           0.833333   

   split2_test_score  split3_test_score  split4_test_score  mean_test_score  \
0           0.750000           0.833333                1.0         0.851282   
1           0.750000           0.833333                1.0         0.867949   
2           0.666667 

In [9]:
from sklearn.pipeline import Pipeline
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
steps = [('scaler',StandardScaler()),('knn',KNeighborsClassifier())]
pipeline = Pipeline(steps)
param_grid = {"knn__n_neighbors":[3,5,6,9]}
classfierCV = GridSearchCV(pipeline,param_grid,cv=5,scoring="recall")
classfierCV.fit(x_train,y_train)
y_pred = classfierCV.predict(x_test)
print("recall score: " ,recall_score(y_test,y_pred) )
print("accuracy score: ",accuracy_score(y_test,y_pred))
print("precision score: ", precision_score(y_test,y_pred))


recall score:  0.875
accuracy score:  0.9016393442622951
precision score:  0.9333333333333333
