In [1]:
import argparse
import mlflow
from fraud_trainer import load_data, train_and_log
from helpers.domino_short_id import domino_short_id
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB


def train_fraud():
    # parser = argparse.ArgumentParser(description="Train fraud classifiers and log to MLflow")
    # parser.add_argument('--clean-filename', required=True, help='CSV file name under data/<project>')
    # parser.add_argument('--model', choices=['xgb','ada','all'], default='all', help='Which model(s) to run')
    # parser.add_argument('--experiment-name', default="CC Fraud Classifier Training", help='MLflow experiment name')
    # parser.add_argument('--random-state', type=int, default=2018)
    # args = parser.parse_args()
    clean_filename = 'preprocessing_processed_cc_transactions.csv'
    experiment_name = f"CC Fraud Classifier Training {domino_short_id()}"
    model = 'ada'
    random_state = None
    
    # Set up experiment
    mlflow.set_experiment(experiment_name)

    # Load data once
    df, X_train, X_val, y_train, y_val, features = load_data(
        clean_filename,
        random_state=random_state
    )
    
    xgb = XGBClassifier(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=4,
        subsample=0.8,
        colsample_bytree=0.8,
        use_label_encoder=False,
        eval_metric="auc",
        random_state=random_state
    )
    
    ada = AdaBoostClassifier(
        n_estimators=10,
        learning_rate=0.1,
        random_state=random_state
    )
    
    gnb = GaussianNB()

    model_list = [
        (xgb, 'XGBoost'),
        (ada, 'AdaBoost'),
        (gnb, 'NaiveBayes')
    ]
    
    for model, name in model_list:
        print(f'training model {name}')
        train_and_log(
            model, name,
            df, X_train, X_val, y_train, y_val,
            features, clean_filename
        )

for i in range(1):
    train_fraud()


training model XGBoost


Registered model 'CC Fraud XGBoost Classifier' already exists. Creating a new version of this model...
2025/07/01 16:20:05 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CC Fraud XGBoost Classifier, version 107
Created version '107' of model 'CC Fraud XGBoost Classifier'.


🏃 View run XGBoost at: http://127.0.0.1:8768/#/experiments/1566/runs/80a347f6487f4bc3bf920333a3eb4755
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1566
training model AdaBoost


Registered model 'CC Fraud AdaBoost Classifier' already exists. Creating a new version of this model...
2025/07/01 16:20:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CC Fraud AdaBoost Classifier, version 113
Created version '113' of model 'CC Fraud AdaBoost Classifier'.


🏃 View run AdaBoost at: http://127.0.0.1:8768/#/experiments/1566/runs/ecbe1afba6764ee6b0fa37ae83f23e0f
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1566
training model NaiveBayes


Registered model 'CC Fraud NaiveBayes Classifier' already exists. Creating a new version of this model...
2025/07/01 16:20:21 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CC Fraud NaiveBayes Classifier, version 48
Created version '48' of model 'CC Fraud NaiveBayes Classifier'.


🏃 View run NaiveBayes at: http://127.0.0.1:8768/#/experiments/1566/runs/a5a46557415248cbb3e988cb9c78745f
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1566
