In [35]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
from sklearn.pipeline import Pipeline


In [36]:
X_selected = pd.read_csv('../data/heart_disease_selected_features.csv')
y = pd.read_csv('../data/heart_disease.csv')['target']

In [37]:
rf_param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}
svm_param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear']
}

In [38]:
rf = RandomForestClassifier(random_state=42)
rf_grid = GridSearchCV(rf, rf_param_grid, cv=5, scoring='f1_macro', n_jobs=-1)
rf_grid.fit(X_selected, y)

In [39]:
svm = SVC(probability=True, random_state=42)
svm_random = RandomizedSearchCV(svm, svm_param_grid, n_iter=10, cv=5, scoring='f1_macro', random_state=42, n_jobs=-1)
svm_random.fit(X_selected, y)



In [40]:
joblib.dump(rf_grid.best_estimator_, '../models/random_forest_optimized.pkl')
joblib.dump(svm_random.best_estimator_, '../models/svm_optimized.pkl')

['../models/svm_optimized.pkl']

In [41]:
results = pd.DataFrame({
    'Model': ['Random Forest', 'SVM'],
    'Best Score (F1-Macro)': [rf_grid.best_score_, svm_random.best_score_],
    'Best Params': [rf_grid.best_params_, svm_random.best_params_]
})
results.to_csv('../results/hyperparameter_tuning_results.csv', index=False)

print("Hyperparameter tuning completed.")
print("Best Random Forest Score (F1-Macro):", rf_grid.best_score_)
print("Best Random Forest Params:", rf_grid.best_params_)
print("Best SVM Score (F1-Macro):", svm_random.best_score_)
print("Best SVM Params:", svm_random.best_params_)
print("Optimized models saved in 'models/' directory.")
print("Tuning results saved in 'results/hyperparameter_tuning_results.csv'.")

Hyperparameter tuning completed.
Best Random Forest Score (F1-Macro): 0.2982922878291299
Best Random Forest Params: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 300}
Best SVM Score (F1-Macro): 0.3303220909809145
Best SVM Params: {'kernel': 'rbf', 'C': 10}
Optimized models saved in 'models/' directory.
Tuning results saved in 'results/hyperparameter_tuning_results.csv'.


In [42]:
preprocessor = joblib.load('../models/preprocessor.pkl')
best_model = joblib.load('../models/random_forest_optimized.pkl')

final_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', best_model)
])

joblib.dump(final_pipeline, '../models/final_model.pkl')

print("Model pipeline (preprocessor + optimized Random Forest) saved as 'models/final_model.pkl'.")

Model pipeline (preprocessor + optimized Random Forest) saved as 'models/final_model.pkl'.
