In [11]:
import sys
import numpy as np
import pandas as pd
PROJECT_ROOT=Path('..').resolve()
sys.path.append(str(PROJECT_ROOT/'src'))
from preprocess import load_and_preprocess , DEFAULT_TARGET_COL
df,X,y ,feature_cols = load_and_preprocess(PROJECT_ROOT /'data'/'heart.csv',target_col=DEFAULT_TARGET_COL)

In [12]:
print('X shape:',X.shape)

X shape: (303, 16)


In [14]:
from sklearn.model_selection import StratifiedKFold
cv= StratifiedKFold(n_splits=5,shuffle=True,random_state=42)
SCORING='roc_auc'

In [15]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

logreg_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(max_iter=5000))
])

param_grid_logreg = {
    'model__C': [0.01, 0.1, 1.0, 10.0, 100.0],
    'model__penalty': ['l2'],
    'model__solver': ['lbfgs']
}

gs_logreg = GridSearchCV(
    estimator=logreg_pipe,
    param_grid=param_grid_logreg,
    scoring=SCORING,
    cv=cv,
    n_jobs=-1
)

gs_logreg.fit(X, y)
print('Best ROC-AUC:', gs_logreg.best_score_)
print('Best params:', gs_logreg.best_params_)



Best ROC-AUC: 0.8921837421837422
Best params: {'model__C': 0.01, 'model__penalty': 'l2', 'model__solver': 'lbfgs'}


In [16]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

hgb = HistGradientBoostingClassifier(random_state=42)

param_grid_hgb = {
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [2, 3, 5, None],
    'max_iter': [200, 500, 1000]
}

gs_hgb = GridSearchCV(
    estimator=hgb,
    param_grid=param_grid_hgb,
    scoring=SCORING,
    cv=cv,
    n_jobs=-1
)

gs_hgb.fit(X, y)
print('Best ROC-AUC:', gs_hgb.best_score_)
print('Best params:', gs_hgb.best_params_)



Best ROC-AUC: 0.906048581048581
Best params: {'learning_rate': 0.01, 'max_depth': 3, 'max_iter': 200}


In [17]:
best_logreg = gs_logreg.best_estimator_
best_hgb = gs_hgb.best_estimator_

print('Tuned LogReg ROC-AUC:', gs_logreg.best_score_)
print('Tuned HistGB ROC-AUC:', gs_hgb.best_score_)

winner = best_hgb if gs_hgb.best_score_ >= gs_logreg.best_score_ else best_logreg
winner_name = 'HistGB' if winner is best_hgb else 'LogReg'
print('Winner:', winner_name)



Tuned LogReg ROC-AUC: 0.8921837421837422
Tuned HistGB ROC-AUC: 0.906048581048581
Winner: HistGB
