In [18]:
# Importing the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, auc

# Loading the dataset
data = pd.read_csv('heart.csv')

# Separating features (X) and the target variable (y)
X = data.drop('target', axis=1)
y = data['target']

# Define a list of k values to test
k_values = [3, 5, 7]

# Create a dictionary to store the results for each k
results = {}

# Perform cross-validation and evaluation for each value of k
for k in k_values:
    # Create a KNN classifier
    knn_classifier = KNeighborsClassifier(n_neighbors=k)

    # Perform cross-validation with 5 folds
    cross_val_scores = cross_val_score(knn_classifier, X, y, cv=5)
    mean_score = cross_val_scores.mean()

    # Split the dataset for final evaluation
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    knn_classifier.fit(X_train, y_train)  # Corrected line
    y_pred = knn_classifier.predict(X_test)

    # Calculate additional metrics
    accuracy = accuracy_score(y_test, y_pred)
    confusion_matrix_result = confusion_matrix(y_test, y_pred)
    classification_report_result = classification_report(y_test, y_pred)

    # Calculate ROC curve and AUC
    fpr, tpr, thresholds = roc_curve(y_test, knn_classifier.predict_proba(X_test)[:, 1])
    roc_auc = auc(fpr, tpr)

    # Store the results in the dictionary
    results[k] = {
        "Cross-Validation Mean Score": mean_score,
        "Accuracy": accuracy,
        "Confusion Matrix": confusion_matrix_result,
        "Classification Report": classification_report_result,
        "ROC AUC": roc_auc,
    }

# Find the best k based on mean cross-validation score
best_k = max(results, key=lambda k: results[k]["Cross-Validation Mean Score"])

# Print the results for all values of k
for k in k_values:
    print(f'KNN (k={k})')
    print(f'Cross-Validation Mean Score: {results[k]["Cross-Validation Mean Score"]}')
    print(f'Test Set Accuracy: {results[k]["Accuracy"]}')
    print('Confusion Matrix:\n', results[k]['Confusion Matrix'])
    print('Classification Report:\n', results[k]['Classification Report'])
    print('ROC AUC:', results[k]['ROC AUC'])
    print('\n')

# Print the best k and its results
print(f'Best k: {best_k}')
print('Performance Metrics for the Best k:')
best_results = results[best_k]
print(f'Cross-Validation Mean Score: {best_results["Cross-Validation Mean Score"]}')
print(f'Test Set Accuracy: {best_results["Accuracy"]}')
print('Confusion Matrix:\n', best_results['Confusion Matrix'])
print('Classification Report:\n', best_results['Classification Report'])
print('ROC AUC:', best_results['ROC AUC'])


KNN (k=3)
Cross-Validation Mean Score: 0.8907317073170733
Test Set Accuracy: 0.9024390243902439
Confusion Matrix:
 [[91 11]
 [ 9 94]]
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.89      0.90       102
           1       0.90      0.91      0.90       103

    accuracy                           0.90       205
   macro avg       0.90      0.90      0.90       205
weighted avg       0.90      0.90      0.90       205

ROC AUC: 0.9586426803731201


KNN (k=5)
Cross-Validation Mean Score: 0.7473170731707317
Test Set Accuracy: 0.7317073170731707
Confusion Matrix:
 [[74 28]
 [27 76]]
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.73      0.73       102
           1       0.73      0.74      0.73       103

    accuracy                           0.73       205
   macro avg       0.73      0.73      0.73       205
weighted avg       0.73      0.73      0.73       205

