In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score , precision_score , recall_score
from sklearn.neighbors import KNeighborsClassifier

In [2]:
heart = pd.read_csv("heart.csv")
heart.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
X = heart.drop(["target"] , axis = 1)
y = heart["target"]

X_train , X_test , y_train , y_test = train_test_split(
    X , y , test_size = 0.2 , random_state = 42
)

In [4]:
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

In [5]:
k = [3,5,7,11,13]
for no in k:
    model = KNeighborsClassifier(n_neighbors = no)
    model.fit(X_train , y_train)

    y_pred = model.predict(X_test)

    print("accuracy :",accuracy_score(y_test , y_pred))
    print("precision :",precision_score(y_test , y_pred))
    print("recall :",recall_score(y_test , y_pred))

accuracy : 0.8524590163934426
precision : 0.9259259259259259
recall : 0.78125
accuracy : 0.9016393442622951
precision : 0.9333333333333333
recall : 0.875
accuracy : 0.9180327868852459
precision : 0.9354838709677419
recall : 0.90625
accuracy : 0.8852459016393442
precision : 0.9032258064516129
recall : 0.875
accuracy : 0.8852459016393442
precision : 0.8787878787878788
recall : 0.90625


In [9]:
#cross validation
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier()
param_grid = {"n_neighbors" : [3,5,7,9]}
model = GridSearchCV(
    classifier,
    param_grid,
    cv = 5,
    scoring = "recall"
)

model.fit(X_train , y_train)
y_pred = model.predict(X_test)

In [16]:
 res = model.cv_results_

In [18]:
pd.DataFrame(res["params"] , res["mean_test_score"])

Unnamed: 0,n_neighbors
0.864387,3
0.85755,5
0.871795,7
0.85698,9


In [20]:
model.best_params_

{'n_neighbors': 7}

In [41]:
#pipeline 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipeline = Pipeline([
    ("scalar" , StandardScaler()),
    ("knn" , KNeighborsClassifier())
])

X = heart.drop(["target"] , axis = 1)
y = heart["target"]
X_train , X_test , y_train , y_test = train_test_split(
    X , y , test_size = 0.2 , random_state = 42
)

param_grid = {"knn__n_neighbors" : [3,5,7,9]}
model = GridSearchCV(
    pipeline,
    param_grid,
    cv = 5,
    scoring = "recall"
)

model.fit(X_train , y_train)
y_pred = model.predict(X_test)

print(recall_score(y_test , y_pred))

0.90625


In [42]:
print(model.best_params_)

{'knn__n_neighbors': 7}
