# 06 - Hyperparameter Tuning
Use GridSearchCV / RandomizedSearchCV to optimize models and save the best estimator.


In [1]:
# Change working directory
import os
os.chdir(r'D:\Books and Courses\Machine Learning\Sprints x Microsoft Summer Camp - AI and Machine Learning\Comprehensive Machine Learning Full Pipeline on Heart Disease UCI Dataset (Graduation Project)\Heart_Disease_Project')

In [2]:
import numpy as np, joblib
from pathlib import Path
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# Load cleaned training data 
train_npz = Path('data/cleaned_train.npz')
arr = np.load(train_npz)
X, y = arr['X'], arr['y']

# Random Forest grid search  
rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [200, 300, 500],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}
gs = GridSearchCV(rf, param_grid, cv=5, n_jobs=-1, scoring='f1')
gs.fit(X, y)
print('RF best:', gs.best_params_, gs.best_score_)

# SVM randomized search  
svm = SVC(probability=True, kernel='rbf', random_state=42)
param_dist = {
    'C': np.logspace(-2, 2, 20),
    'gamma': np.logspace(-3, 1, 20)
}
rs = RandomizedSearchCV(svm, param_distributions=param_dist, n_iter=30, cv=5, n_jobs=-1, scoring='f1', random_state=42)
rs.fit(X, y)
print('SVM best:', rs.best_params_, rs.best_score_)

# Pick the overall best model and save
best_model = gs.best_estimator_ if gs.best_score_ >= rs.best_score_ else rs.best_estimator_
joblib.dump(best_model, 'models/best_model.pkl')
print('Saved tuned best model to models/best_model.pkl')

# Load the saved preprocessor and combine with the best model to form a full pipeline for deployment  
preproc_path = Path("models/preprocessor.pkl")
preprocessor = joblib.load(preproc_path)

full_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", best_model)
])

# Save the combined pipeline 
joblib.dump(full_pipeline, "models/final_model_(full_pipeline).pkl")
print("Saved full pipeline (preprocessing + model) to models/final_model_(full_pipeline).pkl")


RF best: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 200} 0.8072703782006109
SVM best: {'gamma': 0.001, 'C': 8.858667904100823} 0.8158881741712024
Saved tuned best model to models/best_model.pkl
Saved full pipeline (preprocessing + model) to models/final_model_(full_pipeline).pkl
