In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score , precision_score , recall_score
from sklearn.preprocessing import StandardScaler # Scaling is most important in 
from sklearn.neighbors import KNeighborsClassifier

In [5]:
heart_df = pd.read_csv("heart.csv")
heart_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [6]:
X = heart_df.drop("target", axis = 1)
y = heart_df["target"]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42
)


In [12]:
Scaler = StandardScaler() 
X_train_scaled = Scaler.fit_transform(X_train)
X_test_scaled = Scaler.transform(X_test)

In [19]:
#for k = 3 (Hyperparameter)
knn_classifier = KNeighborsClassifier(n_neighbors = 3)
knn_classifier.fit(X_train_scaled, y_train)

y_pred = knn_classifier.predict(X_test_scaled)

print("Accuracy score :",accuracy_score(y_test, y_pred))
print("precision score :",precision_score(y_test, y_pred))
print("recall score :",recall_score(y_test, y_pred))

Accuracy score : 0.8524590163934426
precision score : 0.9259259259259259
recall score : 0.78125


In [20]:
#for k = 5 , here recall improved. we can even check for different k values
knn_classifier = KNeighborsClassifier(n_neighbors = 5)
knn_classifier.fit(X_train_scaled, y_train)

y_pred = knn_classifier.predict(X_test_scaled)

print("Accuracy score :",accuracy_score(y_test, y_pred))
print("precision score :",precision_score(y_test, y_pred))
print("recall score :",recall_score(y_test, y_pred))

Accuracy score : 0.9016393442622951
precision score : 0.9333333333333333
recall score : 0.875


In [23]:
# for k = 7
knn_classifier = KNeighborsClassifier(n_neighbors = 7)
knn_classifier.fit(X_train_scaled, y_train)

y_pred = knn_classifier.predict(X_test_scaled)

print("Accuracy score :",accuracy_score(y_test, y_pred))
print("precision score :",precision_score(y_test, y_pred))
print("recall score :",recall_score(y_test, y_pred))

Accuracy score : 0.9180327868852459
precision score : 0.9354838709677419
recall score : 0.90625


In [38]:
# Cross Validation for hhyperparameter tuning using Grid Search CV
from sklearn.model_selection import GridSearchCV
classifier = KNeighborsClassifier()
param_grid = {"n_neighbors" : [3, 5, 7, 9]}
classifierCV = GridSearchCV(
    classifier,
    param_grid,
    cv = 5, # cv = no. of folds we want 
)

classifierCV.fit(X_train_scaled, y_train)

y_pred = knn_classifier.predict(X_test_scaled)

print("Accuracy score :",accuracy_score(y_test, y_pred))
print("precision score :",precision_score(y_test, y_pred))
print("recall score :",recall_score(y_test, y_pred))

#result
res = pd.DataFrame(classifierCV.cv_results_)
print(res[["param_n_neighbors", "mean_test_score"]] )

print(classifierCV.best_params_) # print best value

Accuracy score : 0.9180327868852459
precision score : 0.9354838709677419
recall score : 0.90625
   param_n_neighbors  mean_test_score
0                  3         0.805782
1                  5         0.814116
2                  7         0.801616
3                  9         0.801786
{'n_neighbors': 5}
