In [1]:
# train_fraud.ipynb
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
import os
import sys
sys.path.append(os.environ["DOMINO_WORKING_DIR"])
from domino_short_id import domino_short_id
from exercises.c_TrainingAndEvaluation.generic_trainer import train_fraud


# Load DataFrame from data source
transformed_df_filename = 'transformed_cc_transactions.csv'

ada_model = {'model': AdaBoostClassifier(
            n_estimators=10,
            learning_rate=0.001,
            algorithm="SAMME",
        ), 'name': "AdaBoost"}
gnb_model = {'model': GaussianNB(), 
             'name': "GaussianNB"}
xbg_model = {'model': XGBClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=3,
            subsample=0.8,
            colsample_bytree=0.8,
            tree_method='hist',
            n_jobs=-1,
            use_label_encoder=False,
            eval_metric="auc",
        ), 'name': "XGBoost"}

all_models = [ada_model, gnb_model, xbg_model]

for model_dict in all_models:
    model_name = model_dict['name']
    model_obj = model_dict['model']
    res = train_fraud(model_obj, model_name, transformed_df_filename)
    print(f"✅ Training {model_name} completed successfully")
    print(res)
    
print(f"{'✅' * len(all_models)} Trainings completed successfully")



2025/09/24 15:00:48 INFO mlflow.tracking.fluent: Experiment with name 'CC Fraud Classifier Training marc_doan_ei86e-yJ' does not exist. Creating a new experiment.


training model AdaBoost
🏃 View run AdaBoost at: http://127.0.0.1:8765/#/experiments/271/runs/da65ecb98801412399924c2e536492d8
🧪 View experiment at: http://127.0.0.1:8765/#/experiments/271
✅ Training AdaBoost completed successfully
{'schema_version': '1.0', 'model_name': 'AdaBoost', 'model_type': 'AdaBoostClassifier', 'run_id': 'da65ecb98801412399924c2e536492d8', 'artifact_uri': 'mlflow-artifacts:/mlflow/da65ecb98801412399924c2e536492d8/artifacts', 'n_test': 15442, 'pos_rate_test': 0.3619997409661961, 'roc_auc': 0.7898495406433825, 'pr_auc': 0.6560193494750639, 'log_loss': 0.486078380166143, 'brier': 0.15097274773539227, 'ks': 0.5796990812867651, 'ece': 0.06740128913557286, 'calib_slope': 0.7383034617517825, 'calib_intercept': -0.12181309254553763, 'accuracy': 0.8204248154384147, 'precision_fraud': 0.7949738219895288, 'recall_fraud': 0.6790697674418604, 'f1_fraud': 0.7324650265315967, 'tau_default': 0.5, 'tn': 8873, 'fp': 979, 'fn': 1794, 'tp': 3796, 'fit_time_sec': 1.441235065460205, '

training model GaussianNB
🏃 View run GaussianNB at: http://127.0.0.1:8765/#/experiments/271/runs/71b5274c1fd94f11aaeb0c5bbd4d3c04
🧪 View experiment at: http://127.0.0.1:8765/#/experiments/271
✅ Training GaussianNB completed successfully
{'schema_version': '1.0', 'model_name': 'GaussianNB', 'model_type': 'GaussianNB', 'run_id': '71b5274c1fd94f11aaeb0c5bbd4d3c04', 'artifact_uri': 'mlflow-artifacts:/mlflow/71b5274c1fd94f11aaeb0c5bbd4d3c04/artifacts', 'n_test': 15442, 'pos_rate_test': 0.3619997409661961, 'roc_auc': 0.8656243712853633, 'pr_auc': 0.7426804879336285, 'log_loss': 0.9551130992789, 'brier': 0.20843485952594373, 'ks': 0.5783914637893053, 'ece': 0.21027272246457307, 'calib_slope': 0.30461179870705707, 'calib_intercept': 0.4659028107083695, 'accuracy': 0.7480896256961533, 'precision_fraud': 0.7918956043956044, 'recall_fraud': 0.41252236135957066, 'f1_fraud': 0.542460597506469, 'tau_default': 0.5, 'tn': 9246, 'fp': 606, 'fn': 3284, 'tp': 2306, 'fit_time_sec': 0.035607099533081055, '

training model XGBoost
🏃 View run XGBoost at: http://127.0.0.1:8765/#/experiments/271/runs/e9087cf4121040fcbcf968e1c50b7ad5
🧪 View experiment at: http://127.0.0.1:8765/#/experiments/271
✅ Training XGBoost completed successfully
{'schema_version': '1.0', 'model_name': 'XGBoost', 'model_type': 'XGBClassifier', 'run_id': 'e9087cf4121040fcbcf968e1c50b7ad5', 'artifact_uri': 'mlflow-artifacts:/mlflow/e9087cf4121040fcbcf968e1c50b7ad5/artifacts', 'n_test': 15442, 'pos_rate_test': 0.3619997409661961, 'roc_auc': 0.9329594365118967, 'pr_auc': 0.8949575858090237, 'log_loss': 0.30374596931088493, 'brier': 0.09431648326862296, 'ks': 0.7254666379046742, 'ece': 0.0195719683117452, 'calib_slope': 1.1313031461774614, 'calib_intercept': 0.040934059761775776, 'accuracy': 0.8507317704960498, 'precision_fraud': 0.8063794068270845, 'recall_fraud': 0.773345259391771, 'f1_fraud': 0.7895169390923203, 'tau_default': 0.5, 'tn': 8814, 'fp': 1038, 'fn': 1267, 'tp': 4323, 'fit_time_sec': 0.8209407329559326, 'predict