# 2.6 Hyperparameter Tuning

**Random Forest Example with GridSearchCV**

In [113]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# Define parameter grid
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize GridSearchCV
grid_rf = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid_rf,
    cv=5,               # 5-fold cross-validation
    scoring='roc_auc',  # optimize for AUC
    n_jobs=-1
)

# Fit GridSearchCV
grid_rf.fit(X_train, y_train)

# Best parameters & score
print("✅ Best RF Parameters:", grid_rf.best_params_)
print("Best AUC Score (CV):", grid_rf.best_score_)

# Evaluate on test set
best_rf = grid_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test)
y_prob_rf = best_rf.predict_proba(X_test)[:, 1]

print("Test Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Test AUC:", roc_auc_score(y_test, y_prob_rf))


✅ Best RF Parameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}
Best AUC Score (CV): 0.902979020979021
Test Accuracy: 0.8333333333333334
Test AUC: 0.921875


**SVM Example with RandomizedSearchCV**

In [114]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from scipy.stats import uniform

# Parameter distribution for SVM
param_dist_svm = {
    'C': uniform(0.1, 10),
    'gamma': ['scale', 'auto'],
    'kernel': ['rbf', 'linear', 'poly']
}

# Initialize RandomizedSearchCV
rand_svm = RandomizedSearchCV(
    estimator=SVC(probability=True, random_state=42),
    param_distributions=param_dist_svm,
    n_iter=20,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    random_state=42
)

# Fit RandomizedSearchCV
rand_svm.fit(X_train, y_train)

# Best parameters & score
print("✅ Best SVM Parameters:", rand_svm.best_params_)
print("Best AUC Score (CV):", rand_svm.best_score_)

# Evaluate on test set
best_svm = rand_svm.best_estimator_
y_pred_svm = best_svm.predict(X_test)
y_prob_svm = best_svm.predict_proba(X_test)[:, 1]

print("Test Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Test AUC:", roc_auc_score(y_test, y_prob_svm))


✅ Best SVM Parameters: {'C': 9.588855372533333, 'gamma': 'auto', 'kernel': 'linear'}
Best AUC Score (CV): 0.8964475524475525
Test Accuracy: 0.85
Test AUC: 0.9162946428571428


# Saving the Model

In [116]:
import os
import joblib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Create folder if it doesn't exist
os.makedirs("models", exist_ok=True)

# Example: Random Forest pipeline
pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('model', best_rf)
])

# Fit pipeline (optional)
pipeline_rf.fit(X_train, y_train)

# Save pipeline
joblib.dump(pipeline_rf, "models/heart_disease_rf_pipeline.pkl")
print("✅ Random Forest model pipeline saved successfully!")


✅ Random Forest model pipeline saved successfully!


In [117]:
# Create folder if it doesn't exist (optional, already done)
os.makedirs("models", exist_ok=True)

# SVM pipeline
pipeline_svm = Pipeline([
    ('scaler', StandardScaler()),  # SVM needs scaling
    ('model', best_svm)            # your optimized SVM model
])

# Fit pipeline on training data
pipeline_svm.fit(X_train, y_train)

# Save pipeline to .pkl
joblib.dump(pipeline_svm, "models/heart_disease_svm_pipeline.pkl")
print("✅ SVM model pipeline saved successfully!")


✅ SVM model pipeline saved successfully!
