In [8]:
# 📘 06_hyperparameter_tuning.ipynb

# 📦 Step 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report
import os

# 📥 Step 2: Load cleaned data
df = pd.read_csv('../data/heart_cleaned.csv')
X = df.drop('target', axis=1)
y = df['target']

# 🔀 Step 3: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🌲 Step 4: Random Forest + Grid Search
print("✅ Tuning Random Forest...")
rf = RandomForestClassifier()
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [10, 15],
    'min_samples_split': [2, 10],
    'min_samples_leaf': [1, 4],
    'bootstrap': [True, False]
}
rf_grid = GridSearchCV(rf, param_grid_rf, cv=5, n_jobs=-1, verbose=1)
rf_grid.fit(X_train, y_train)

best_rf = rf_grid.best_estimator_
y_pred_rf = best_rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
report_rf = classification_report(y_test, y_pred_rf)

print("\n✅ Random Forest Best Params:")
print(rf_grid.best_params_)
print("\n📈 Accuracy on Test:", accuracy_rf)
print("\n📄 Classification Report (Random Forest):")
print(report_rf)

# 🔍 Step 5: SVM + Grid Search
print("\n✅ Tuning SVM...")
svm = SVC()
param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
svm_grid = GridSearchCV(svm, param_grid_svm, cv=5, n_jobs=-1, verbose=1)
svm_grid.fit(X_train, y_train)

best_svm = svm_grid.best_estimator_
y_pred_svm = best_svm.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

print("\n✅ SVM Best Params:")
print(svm_grid.best_params_)
print("\n📈 Accuracy on Test:", accuracy_svm)
print("\n📄 Classification Report (SVM):")
print(report_svm)

# 📄 Step 6: Save evaluation report (only metrics, no model)
os.makedirs("../results", exist_ok=True)
with open("../results/evaluation_metrics.txt", "w", encoding="utf-8") as f:
    f.write(f"📌 Model: Random Forest Classifier\n")
    f.write(f"Best Parameters: {rf_grid.best_params_}\n")
    f.write(f"Accuracy on Test: {accuracy_rf:.4f}\n\n")
    f.write(f"📄 Classification Report:\n{report_rf}\n\n")

    f.write(f"📌 Model: SVM Classifier\n")
    f.write(f"Best Parameters: {svm_grid.best_params_}\n")
    f.write(f"Accuracy on Test: {accuracy_svm:.4f}\n\n")
    f.write(f"📄 Classification Report:\n{report_svm}\n\n")

    f.write("📌 Notes:\n")
    f.write("- Dataset imbalance may have affected performance.\n")
    f.write("- Consider SMOTE or class_weight for improvements.\n")

print("✅ Evaluation metrics saved to: ../results/evaluation_metrics.txt")


✅ Tuning Random Forest...
Fitting 5 folds for each of 32 candidates, totalling 160 fits

✅ Random Forest Best Params:
{'bootstrap': False, 'max_depth': 15, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 50}

📈 Accuracy on Test: 0.4918032786885246

📄 Classification Report (Random Forest):
              precision    recall  f1-score   support

           0       0.67      0.97      0.79        29
           1       0.14      0.08      0.11        12
           2       0.12      0.11      0.12         9
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         4

    accuracy                           0.49        61
   macro avg       0.19      0.23      0.20        61
weighted avg       0.36      0.49      0.41        61


✅ Tuning SVM...
Fitting 5 folds for each of 12 candidates, totalling 60 fits

✅ SVM Best Params:
{'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}

📈 Accuracy on Test: 0.5245901639344263

📄 Classification 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
