In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load selected features dataset
df = pd.read_csv("../data/heart_disease_selected.csv")
X = df.drop("target", axis=1)
y = df["target"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Define SVM and param grid
svm_params = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

svm = SVC(probability=True, random_state=42)

random_search = RandomizedSearchCV(
    estimator=svm,
    param_distributions=svm_params,
    n_iter=10,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42
)

# Fit on training data
random_search.fit(X_train, y_train)

# Save best SVM
best_svm = random_search.best_estimator_

# Test accuracy
y_pred_svm = best_svm.predict(X_test)
print("SVM Test Accuracy:", accuracy_score(y_test, y_pred_svm))


SVM Test Accuracy: 0.6


In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Example: Random Forest Grid Search
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, rf_params, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best hyperparameters:", grid_search.best_params_)
print("Best cross-validated accuracy:", grid_search.best_score_)

# Evaluate on test set
best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))


Best hyperparameters: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 50}
Best cross-validated accuracy: 0.5613475177304965
Test Accuracy: 0.7


In [10]:
import joblib
import os

os.makedirs("../models", exist_ok=True)

joblib.dump(best_rf, "../models/best_random_forest.pkl")
joblib.dump(best_svm, "../models/best_svm.pkl")
print("Optimized models saved successfully!")


Optimized models saved successfully!


SVM Test Accuracy: 0.6
Random Forest Test Accuracy: 0.6666666666666666
Best RF Hyperparameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 200}
Optimized models saved successfully!
