In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# Load the dataset
file_path = 'framingham.csv' 
df = pd.read_csv(file_path)

# Visualize relationships between variables        
# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.savefig('correlation_heatmap.png')  # Save the heatmap to a file
plt.close()

In [4]:
# Handle missing values
df['education'] = df['education'].fillna(df['education'].mode()[0])
df['cigsPerDay'] = df['cigsPerDay'].fillna(df['cigsPerDay'].median())
df['BPMeds'] = df['BPMeds'].fillna(df['BPMeds'].median())
df['totChol'] = df['totChol'].fillna(df['totChol'].median())
df['BMI'] = df['BMI'].fillna(df['BMI'].median())
df['heartRate'] = df['heartRate'].fillna(df['heartRate'].median())
df['glucose'] = df['glucose'].fillna(df['glucose'].median())

# Normalize the numerical features
X = df.drop(columns=['TenYearCHD'])
y = df['TenYearCHD']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
# Split the dataset into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Try different values of K (e.g., K=3, 5, 7)
k_values = [3, 5, 7]
results = {}

# Train and evaluate the models
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    results[k] = {
        'accuracy': accuracy,
        'confusion_matrix': confusion_matrix(y_test, y_pred),
        'classification_report': classification_report(y_test, y_pred)
    }
    
     # Print results for each K model
    print(f"Results for K={k}:")
    print(f"Accuracy: {results[k]['accuracy']}")
    print("Confusion Matrix:")
    print(results[k]['confusion_matrix'])
    print("Classification Report:")
    print(results[k]['classification_report'])
    print("="*50)

Results for K=3:
Accuracy: 0.8136792452830188
Confusion Matrix:
[[676  49]
 [109  14]]
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.93      0.90       725
           1       0.22      0.11      0.15       123

    accuracy                           0.81       848
   macro avg       0.54      0.52      0.52       848
weighted avg       0.77      0.81      0.79       848

Results for K=5:
Accuracy: 0.8301886792452831
Confusion Matrix:
[[698  27]
 [117   6]]
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.96      0.91       725
           1       0.18      0.05      0.08       123

    accuracy                           0.83       848
   macro avg       0.52      0.51      0.49       848
weighted avg       0.76      0.83      0.79       848

Results for K=7:
Accuracy: 0.8325471698113207
Confusion Matrix:
[[702  23]
 [119   4]]
Classification Report:
              pr

In [6]:
# Automatically choose the best model based on highest accuracy
best_k = max(results, key=lambda k: results[k]['accuracy'])

print(f"The best model is with K = {best_k}.")
print(f"Accuracy: {results[best_k]['accuracy']}")
print("Confusion Matrix:")
print(results[best_k]['confusion_matrix'])
print("Classification Report:")
print(results[best_k]['classification_report'])

The best model is with K = 7.
Accuracy: 0.8325471698113207
Confusion Matrix:
[[702  23]
 [119   4]]
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.97      0.91       725
           1       0.15      0.03      0.05       123

    accuracy                           0.83       848
   macro avg       0.50      0.50      0.48       848
weighted avg       0.75      0.83      0.78       848

