In [None]:
from train import CreditRiskTrainer
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import pandas as pd


# 1. Load  processed dataset
df = pd.read_csv("data/processed/processed_data.csv")

X = df.drop("FraudResult", axis=1)
y = df["FraudResult"]

# 2. Initialize trainer
trainer = CreditRiskTrainer(X, y)

# 3. Add models
trainer.add_model(
    name="RandomForest",
    model=RandomForestClassifier(),
    param_grid={
        "n_estimators": [100, 200],
        "max_depth": [5, 10]
    }
)

trainer.add_model(
    name="XGBoost",
    model=xgb.XGBClassifier(
        eval_metric="logloss",
        tree_method="hist"
    ),
    param_grid=None   # No tuning
)

# 4. Train and evaluate all models
trainer.train_and_tune()

# 5. Log results + models into MLflow
trainer.log_experiments()

# 6. Save best performing model locally (for prediction)
trainer.save_best_model_locally("notebooks/models/best_model")

# 7. Optionally register best model in MLflow registry
trainer.register_best_model()