In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("D:/Coding/Python/Heart_Disease_Project/heart+disease/heart_disease.csv")
X = df.drop("target", axis=1)
y = df["target"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [4, 6, 10, None],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best Parameters: ", grid_search.best_params_)
print("Best Accuracy: ", grid_search.best_score_)

Best Parameters:  {'max_depth': 4, 'min_samples_split': 10, 'n_estimators': 50}
Best Accuracy:  0.8432624113475178


In [3]:
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

svc = SVC(probability=True)
param_dist = {
    'C': uniform(0.1, 10),
    'gamma': ['scale', 'auto'],
    'kernel': ['linear', 'rbf', 'poly']
}

random_search = RandomizedSearchCV(svc, param_distributions=param_dist, n_iter=20,
                                   scoring='accuracy', cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

print("Best Parameters: ", random_search.best_params_)
print("Best Accuracy: ", random_search.best_score_)

Best Parameters:  {'C': np.float64(7.41993941811405), 'gamma': 'scale', 'kernel': 'linear'}
Best Accuracy:  0.826418439716312


In [4]:
from sklearn.metrics import classification_report

rf_best = grid_search.best_estimator_
y_pred_rf = rf_best.predict(X_test)
print("Random Forest - performance after modification: ")
print(classification_report(y_test, y_pred_rf))

svm_best = random_search.best_estimator_
y_pred_svm = svm_best.predict(X_test)
print("SVM - performance after modification: ")
print(classification_report(y_test, y_pred_svm))

Random Forest - performance after modification: 
              precision    recall  f1-score   support

           0       0.81      0.94      0.87        32
           1       0.91      0.75      0.82        28

    accuracy                           0.85        60
   macro avg       0.86      0.84      0.85        60
weighted avg       0.86      0.85      0.85        60

SVM - performance after modification: 
              precision    recall  f1-score   support

           0       0.83      0.91      0.87        32
           1       0.88      0.79      0.83        28

    accuracy                           0.85        60
   macro avg       0.85      0.85      0.85        60
weighted avg       0.85      0.85      0.85        60



In [9]:
# Save Pipeline #
import joblib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import os

final_model = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', SVC(kernel='rbf', C=1, probability=True))
])

final_model.fit(X, y)

os.makedirs("D:\Coding\Python\Heart_Disease_Project\models", exist_ok=True)
joblib.dump(final_model, "D:/Coding/Python/Heart_Disease_Project/models/final_model.pkl")

print("The model has been successfully saved")

The model has been successfully saved
