In [None]:
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score

# load preprocessed
X_train, X_test, y_train, y_test = joblib.load("../data/processed/preprocessed.pkl")


In [None]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=150),
    "XGBoost": XGBClassifier(
        n_estimators=200, max_depth=4, learning_rate=0.1, subsample=0.9
    )
}

trained_models = {}

for name, model in models.items():
    print(f"\nTraining {name} ...")
    model.fit(X_train, y_train)
    trained_models[name] = model


In [None]:
for name, model in trained_models.items():
    preds = model.predict(X_test)
    proba = model.predict_proba(X_test)[:, 1]
    
    print("\n==============================")
    print(name)
    print("==============================")
    print(classification_report(y_test, preds))
    print(f"AUC: {roc_auc_score(y_test, proba)}")


In [None]:
for name, model in trained_models.items():
    filename = f"../models/{name.replace(' ', '_').lower()}.joblib"
    joblib.dump(model, filename)
    print("Saved:", filename)
