In [3]:
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# -------------------------------
# Load dataset
# -------------------------------
df = pd.read_csv("cardio_train_cleaned.csv")

X = df.drop("cardio", axis=1)
y = df["cardio"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
# 1️⃣ Train-Test Split Accuracy
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

knn_tt_acc = accuracy_score(y_test, knn.predict(X_test))
print("KNN - Train-Test Accuracy:", knn_tt_acc)

# 2️⃣ K-Fold Accuracy
knn_kfold_acc = cross_val_score(knn, X, y, cv=5).mean()
print("KNN - K-Fold Accuracy:", knn_kfold_acc)

# 3️⃣ Hyperparameter Tuning Accuracy
params = {"n_neighbors": [3, 5, 7, 9]}

knn_grid = GridSearchCV(knn, params, cv=5)
knn_grid.fit(X_train, y_train)

knn_tuned_acc = accuracy_score(y_test, knn_grid.predict(X_test))
print("KNN - Tuned Accuracy:", knn_tuned_acc)


KNN - Train-Test Accuracy: 0.6884137028147501
KNN - K-Fold Accuracy: 0.6864071718242559
KNN - Tuned Accuracy: 0.7029602152883846
