In [1]:
import os
from dotenv import load_dotenv
import mlflow
import pathlib

load_dotenv(override=True)

EXPERIMENT_NAME = "/Users/isabel.valladolid@iteso.mx/nyc-taxi-experiments"
mlflow.set_tracking_uri("databricks")  
mlflow.set_experiment(EXPERIMENT_NAME)

<Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/1858882611825285', creation_time=1761102889708, experiment_id='1858882611825285', last_update_time=1761699138796, lifecycle_stage='active', name='/Users/isabel.valladolid@iteso.mx/nyc-taxi-experiments', tags={'mlflow.experiment.sourceName': '/Users/isabel.valladolid@iteso.mx/nyc-taxi-experiments',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'isabel.valladolid@iteso.mx',
 'mlflow.ownerId': '72142664924861'}>

#### Descargar en la carpeta data el conjunto de datos correspondiente a marzo del 2025

In [2]:
import requests
import pathlib

DATA_DIR = pathlib.Path("../data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

URL = "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2025-03.parquet"
TARGET_PATH = os.path.join(DATA_DIR, 'green_tripdata_2025-03.parquet')

if not os.path.exists(TARGET_PATH):
    if URL:
        print(f"Descargando {URL} -> {TARGET_PATH} ...")
        r = requests.get(URL, stream=True)
        r.raise_for_status()
        with open(TARGET_PATH, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024*1024):
                f.write(chunk)
        print("Descarga completa.")
    else:
        print("No se proporcionó DOWNLOAD_URL. Por favor coloca la URL del parquet en la variable de entorno GREEN_2025_03_URL o descarga manualmente a data/.")
else:
    print(f"El archivo ya existe en {TARGET_PATH}")

El archivo ya existe en ../data/green_tripdata_2025-03.parquet


#### Leer y preprocesar los datos

In [3]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import root_mean_squared_error

def read_dataframe(path):
    df = pd.read_parquet(path)
    df["duration"] = (df.lpep_dropoff_datetime - df.lpep_pickup_datetime).dt.total_seconds() / 60
    df = df[(df.duration >= 1) & (df.duration <= 60)]
    df[["PULocationID", "DOLocationID"]] = df[["PULocationID", "DOLocationID"]].astype(str)
    return df

df_train = read_dataframe("../data/green_tripdata_2025-01.parquet")
df_val = read_dataframe("../data/green_tripdata_2025-02.parquet")

In [4]:
# Feature Engineering + One Hot Encoding

def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)

dv = DictVectorizer()
df_train = df_train.copy()
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
X_train = dv.fit_transform(df_train[['PU_DO','trip_distance']].to_dict(orient='records'))

X_val = preprocess(df_val, dv)
target = 'duration'
y_train = df_train[target].values
y_val   = df_val[target].values

# Log opcional de datasets (como en tu base):
training_dataset   = mlflow.data.from_numpy(getattr(X_train, "data", X_train), targets=y_train, name="green_tripdata_2025-01")
validation_dataset = mlflow.data.from_numpy(getattr(X_val, "data", X_val), targets=y_val, name="green_tripdata_2025-02")

X_train.shape, X_val.shape

((46307, 4159), (44218, 4159))

#### Primer parent experiment: **Gradient Boost (XGBoost) con Optuna**

In [5]:
import xgboost as xgb
import optuna
import pickle
from optuna.samplers import TPESampler
from mlflow.models.signature import infer_signature
from sklearn.metrics import mean_squared_error
import numpy as np

In [6]:
PARENT_XGB = "XGBoost_Parent_Experiment"

if X_train is not None:
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dvalid = xgb.DMatrix(X_val, label=y_val)

    def objective(trial: optuna.trial.Trial):
        params = {
            'max_depth': trial.suggest_int('max_depth', 4, 20),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
            'min_child_weight': trial.suggest_float('min_child_weight', 1e-3, 10.0, log=True),
            'objective': 'reg:squarederror',
            'seed': 42
        }

        with mlflow.start_run(run_name='xgboost_optuna_trial', nested=True):
            mlflow.set_tag('model_family', 'xgboost')
            mlflow.log_params(params)

            booster = xgb.train(
                params=params,
                dtrain=dtrain,
                num_boost_round=200,
                evals=[(dvalid, 'validation')],
                early_stopping_rounds=10,
                verbose_eval=False
            )

            y_pred = booster.predict(dvalid)
            rmse = np.sqrt(mean_squared_error(y_val, y_pred))
            mlflow.log_metric('rmse', rmse)

            # guardar modelo
            signature = infer_signature(X_val[:5].toarray() if hasattr(X_val, 'toarray') else X_val[:5], y_pred[:5])
            mlflow.xgboost.log_model(booster, artifact_path='model', input_example=X_val[:5].toarray() if hasattr(X_val, 'toarray') else X_val[:5], signature=signature)

        return rmse
    
    sampler = TPESampler(seed=42)
    study = optuna.create_study(direction='minimize', sampler=sampler)

    with mlflow.start_run(run_name='XGBoost Hyperparameter Optimization Parent', nested=False):
        study.optimize(objective, n_trials=5)

        best_params = study.best_params
        best_params['seed'] = 42
        best_params['objective'] = 'reg:squarederror'

        # Entrenar modelo final con best_params
        booster = xgb.train(
            params=best_params,
            dtrain=dtrain,
            num_boost_round=200,
            evals=[(dvalid, 'validation')],
            early_stopping_rounds=10,
            verbose_eval=False
        )

        y_pred = booster.predict(dvalid)
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        mlflow.log_metric('rmse', rmse)

        pathlib.Path('preprocessor').mkdir(exist_ok=True)
        with open('preprocessor/dv_xgb.b', 'wb') as f_out:
            pickle.dump(dv, f_out)
        mlflow.log_artifact('preprocessor/dv_xgb.b', artifact_path='preprocessor')

        # Registrar artefacto modelo
        feature_names = dv.get_feature_names_out()
        input_example = pd.DataFrame(X_val[:5].toarray(), columns=feature_names)
        signature = infer_signature(input_example, y_pred[:5])
        mlflow.xgboost.log_model(booster, artifact_path='model', input_example=input_example, signature=signature, registered_model_name=None)

[I 2025-10-28 19:07:08,127] A new study created in memory with name: no-name-03399315-e23c-4710-b662-354bfc74c519
  xgb_model.save_model(model_data_path)
  model.load_model(xgb_model_path)
2025/10/28 19:07:16 INFO mlflow.models.model: Found the following environment variables used during model inference: [DATABRICKS_HOST, DATABRICKS_TOKEN]. Please check if you need to set them when deploying the model. To disable this message, set environment variable `MLFLOW_RECORD_ENV_VARS_IN_MODEL_LOGGING` to `false`.


🏃 View run xgboost_optuna_trial at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/7a0eb5441f7b4b199959cb5ebb44bcc8
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285


[I 2025-10-28 19:07:25,363] Trial 0 finished with value: 5.43449809048156 and parameters: {'max_depth': 10, 'learning_rate': 0.2536999076681771, 'reg_alpha': 0.03872090295370417, 'reg_lambda': 0.0024430162614261434, 'min_child_weight': 0.004207988669606638}. Best is trial 0 with value: 5.43449809048156.
  xgb_model.save_model(model_data_path)
  model.load_model(xgb_model_path)


🏃 View run xgboost_optuna_trial at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/50814dde9c9743f9a003073749cfd18e
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285


[I 2025-10-28 19:07:37,943] Trial 1 finished with value: 5.692282028786779 and parameters: {'max_depth': 6, 'learning_rate': 0.012184186502221764, 'reg_alpha': 0.6245760287469887, 'reg_lambda': 0.002570603566117596, 'min_child_weight': 0.6796578090758157}. Best is trial 0 with value: 5.43449809048156.
  xgb_model.save_model(model_data_path)
  model.load_model(xgb_model_path)


🏃 View run xgboost_optuna_trial at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/182f34f2311248ab8f86888e6800209d
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285


[I 2025-10-28 19:07:48,115] Trial 2 finished with value: 5.532208825182618 and parameters: {'max_depth': 4, 'learning_rate': 0.27081608642499677, 'reg_alpha': 0.310444354994832, 'reg_lambda': 8.148018307012941e-07, 'min_child_weight': 0.005337032762603957}. Best is trial 0 with value: 5.43449809048156.
  xgb_model.save_model(model_data_path)
  model.load_model(xgb_model_path)


🏃 View run xgboost_optuna_trial at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/107190bd33774008a71d6d579d207e10
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285


[I 2025-10-28 19:07:57,741] Trial 3 finished with value: 5.589369956664044 and parameters: {'max_depth': 7, 'learning_rate': 0.028145092716060652, 'reg_alpha': 0.00052821153945323, 'reg_lambda': 7.71800699380605e-05, 'min_child_weight': 0.014618962793704969}. Best is trial 0 with value: 5.43449809048156.
  xgb_model.save_model(model_data_path)
  model.load_model(xgb_model_path)


🏃 View run xgboost_optuna_trial at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/bd03e91fc1344b31a8771e67eeea5275
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285


[I 2025-10-28 19:08:34,208] Trial 4 finished with value: 5.541143092081019 and parameters: {'max_depth': 14, 'learning_rate': 0.01607123851203988, 'reg_alpha': 4.258943089524393e-06, 'reg_lambda': 1.9826980964985924e-05, 'min_child_weight': 0.06672367170464209}. Best is trial 0 with value: 5.43449809048156.
  xgb_model.save_model(model_data_path)
  model.load_model(xgb_model_path)


🏃 View run XGBoost Hyperparameter Optimization Parent at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/10d1adaab013499eaab70f1583a39cd6
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285


#### Segundo parent experiment: **Random Forest con Randomized Search (sklearn)**

In [7]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV

In [8]:
PARENT_RF = 'RandomForest_Parent_Experiment'

if X_train is not None:
    rf = RandomForestRegressor(random_state=42, n_jobs=-1)
    param_dist = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }

    with mlflow.start_run(run_name='RandomForest Parent', nested=False):
        mlflow.set_tag('model_family', 'random_forest')
        search = RandomizedSearchCV(rf, param_distributions=param_dist, n_iter=6, cv=3, scoring='neg_root_mean_squared_error', n_jobs=-1, random_state=42)
        search.fit(X_train, y_train)

        best_rf = search.best_estimator_
        y_pred = best_rf.predict(X_val)
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        mlflow.log_params(search.best_params_)
        mlflow.log_metric('rmse', rmse)

        # guardar preprocessor y modelo
        with open('preprocessor/dv_rf.b', 'wb') as f_out:
            pickle.dump(dv, f_out)
        mlflow.log_artifact('preprocessor/dv_rf.b', artifact_path='preprocessor')

        # registrar sklearn model
        mlflow.sklearn.log_model(best_rf, artifact_path='model', input_example=pd.DataFrame(X_val[:5].toarray(), columns=dv.get_feature_names_out()))



🏃 View run RandomForest Parent at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/bba9b771a9f0407faa9bf3085d32ee40
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285


#### Registrar el mejor modelo de los experimentos como 'nyc-taxi-model' y asignar alias 'challenger'

In [11]:
def register_best_as_challenger(experiment_name, model_name):
    """
    Busca el mejor run por RMSE en el experimento, registra su artefacto 'model' como nueva
    versión de `model_name` y asigna el alias 'challenger' a esa versión.
    Retorna un dict con información básica (model_name, version) o None si falla.
    """
    import mlflow
    from mlflow import MlflowClient
    from datetime import datetime

    client = MlflowClient()

    runs = mlflow.search_runs(experiment_names=[experiment_name], order_by=["metrics.rmse ASC"], output_format="list")
    if not runs:
        print("No se encontraron runs para el experimento:", experiment_name)
        return None

    best_run = runs[0]
    run_id = best_run.info.run_id
    rmse = best_run.data.metrics.get("rmse")
    print("Mejor run:", run_id, "rmse=", rmse)

    model_uri = f"runs:/{run_id}/model"
    result = mlflow.register_model(model_uri=model_uri, name=model_name)
    version = result.version

    # Intentar asignar alias 'challenger' (algunas versiones de MLflow usan distintos métodos)
    try:
        client.set_registered_model_alias(name=model_name, alias="challenger", version=version)
    except Exception:
        try:
            # fallback: marcar la versión con un tag indicando challenger
            client.set_model_version_tag(name=model_name, version=version, key="alias", value="challenger")
        except Exception as exc:
            print("No se pudo asignar alias 'challenger' automáticamente:", exc)

    # Actualizar descripción de la versión con timestamp legible
    try:
        client.update_model_version(name=model_name, version=version,
                                    description=f"Assigned alias 'challenger' on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    except Exception as exc:
        print("No se pudo actualizar la descripción de la versión:", exc)

    print(f"Modelo registrado: {model_name} v{version} (alias 'challenger' asignado - comprobar en MLflow UI)")
    return {"model_name": model_name, "version": version}


register_best_as_challenger(EXPERIMENT_NAME, "workspace.default.nyc-taxi-model")

Mejor run: 4da81d8531784e338eb8e7b23047c809 rmse= 5.410463874732254


Registered model 'workspace.default.nyc-taxi-model' already exists. Creating a new version of this model...


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Created version '10' of model 'workspace.default.nyc-taxi-model'.


Modelo registrado: workspace.default.nyc-taxi-model v10 (alias 'challenger' asignado - comprobar en MLflow UI)


{'model_name': 'workspace.default.nyc-taxi-model', 'version': '10'}

#### Evaluar Champion y Challenger sobre marzo 2025

Evalúa modelos Champion y challenger sobre marzo 2025.Requiere: read_dataframe(), preprocess(df, dv) y un DictVectorizer `dv`. Retorna: dict con rmse_champion, rmse_challenger y un DataFrame summary. Imprime diagnósticos si no puede cargar los modelos.

In [None]:
def evaluate_champion_challenger_march(march_path="../data/green_tripdata_2025-03.parquet",
                                       dv=None,
                                       model_name="workspace.default.nyc-taxi-model"):

    import os
    import mlflow.pyfunc
    from mlflow import MlflowClient
    import numpy as np
    import pandas as pd
    from sklearn.metrics import mean_squared_error

    if dv is None:
        raise ValueError("Se requiere `dv` (DictVectorizer). Pasa dv=tu_vectorizador.")

    if not os.path.exists(march_path):
        raise FileNotFoundError(f"No se encontró {march_path}. Descarga marzo 2025 en data/ y vuelve a ejecutar.")

    # preparar datos
    df_march = read_dataframe(march_path)
    X_march = preprocess(df_march, dv=dv)
    X_input = X_march.toarray() if hasattr(X_march, "toarray") else X_march
    y_march = df_march["duration"].values

    client = MlflowClient()

    def try_load_and_score(alias):
        uri = f"models:/{model_name}@{alias}"
        try:
            m = mlflow.pyfunc.load_model(uri)
            y_pred = m.predict(X_input)
            rmse = float(np.sqrt(mean_squared_error(y_march, y_pred)))
            return {"rmse": rmse, "error": None}
        except Exception as exc:
            # diagnóstico: listar versiones y aliases
            diag = []
            try:
                for rm in client.search_registered_models():
                    if rm.name == model_name:
                        for v in rm.latest_versions:
                            diag.append({"version": v.version, "stage": v.current_stage, "aliases": v.aliases})
            except Exception:
                pass
            return {"rmse": None, "error": str(exc), "diagnostics": diag}

    champion_res = try_load_and_score("Champion")
    challenger_res = try_load_and_score("challenger")

    results = {
        "rmse_champion": champion_res.get("rmse"),
        "rmse_challenger": challenger_res.get("rmse"),
        "champion_error": champion_res.get("error"),
        "challenger_error": challenger_res.get("error")
    }

    # summary DataFrame
    summary = pd.DataFrame([
        {"role": "Champion", "rmse": results["rmse_champion"], "error": results["champion_error"]},
        {"role": "Challenger", "rmse": results["rmse_challenger"], "error": results["challenger_error"]}
    ])

    # agregar comparación si ambos rmse disponibles
    if results["rmse_champion"] is not None and results["rmse_challenger"] is not None:
        results["delta"] = results["rmse_challenger"] - results["rmse_champion"]
        summary["delta_vs_champion"] = summary["rmse"] - results["rmse_champion"]

    # imprimir breve reporte
    print("Evaluación marzo 2025 — modelo:", model_name)
    print(summary)

    # adjuntar diagnostics en caso de error
    if champion_res.get("diagnostics"):
        print("Diagnostics Champion:", champion_res["diagnostics"])
    if challenger_res.get("diagnostics"):
        print("Diagnostics Challenger:", challenger_res["diagnostics"])

    results["summary_df"] = summary
    return results

summary_results = evaluate_champion_challenger_march(
    march_path="../data/green_tripdata_2025-03.parquet",
    dv=dv,
    model_name="workspace.default.nyc-taxi-model"   )

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  model.load_model(xgb_model_path)


Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  model.load_model(xgb_model_path)


Evaluación marzo 2025 — modelo: workspace.default.nyc-taxi-model
         role       rmse error  delta_vs_champion
0    Champion  13.200961  None                0.0
1  Challenger  13.200961  None                0.0


#### Decisión de promoción y justificación

Decide si promover el challenger a Champion según summary_results (dict que contiene rmse_champion y rmse_challenger). Registra la decisión en un run de MLflow con trazas y tags; intenta asignar alias 'Champion' y/o transicionar la versión a 'Production'. Retorna dict con la decisión y trazas.

In [None]:
def decide_promotion_and_justify(summary_results,
                                 model_name="workspace.default.nyc-taxi-model",
                                 delta_threshold=0.0,
                                 experiment_name=None,
                                 mlflow_run_name="promotion_decision"):
    
    import mlflow
    from mlflow import MlflowClient
    from datetime import datetime
    import json

    client = MlflowClient()

    rmse_champ = summary_results.get("rmse_champion")
    rmse_chall = summary_results.get("rmse_challenger")

    decision = {
        "model_name": model_name,
        "rmse_champion": rmse_champ,
        "rmse_challenger": rmse_chall,
        "delta": None,
        "promote": False,
        "reason": None,
        "timestamp": datetime.now().isoformat(),
        "diagnostics": []
    }

    if rmse_champ is None or rmse_chall is None:
        decision["reason"] = "Falta rmse de Champion o Challenger"
        # add registry diagnostics
        try:
            for rm in client.search_registered_models():
                if rm.name == model_name:
                    for v in rm.latest_versions:
                        decision["diagnostics"].append({"version": v.version, "stage": v.current_stage, "aliases": v.aliases})
        except Exception as exc:
            decision["diagnostics"].append({"error_listing_models": str(exc)})

        # log decision run
        with mlflow.start_run(run_name=mlflow_run_name):
            mlflow.set_tag("decision_flow", "promotion_check")
            mlflow.log_dict(decision, "promotion_decision.json")
        return decision

    decision["delta"] = rmse_chall - rmse_champ

    # regla: promover si challenger mejor por más que delta_threshold (menor RMSE => mejor)
    if decision["delta"] < -abs(delta_threshold):
        decision["promote"] = True
        decision["reason"] = f"Challenger mejora Champion por {abs(decision['delta']):.6f} (> {delta_threshold})"
    else:
        decision["promote"] = False
        decision["reason"] = f"No hay mejora significativa (delta={decision['delta']:.6f}, threshold={delta_threshold})"

    # localizar versión marcada como 'challenger'
    challenger_version = None
    try:
        for rm in client.search_registered_models():
            if rm.name == model_name:
                for v in rm.latest_versions:
                    # v.aliases disponible en muchas versiones; fallback a tags
                    aliases = getattr(v, "aliases", []) or []
                    if "challenger" in [a.lower() for a in aliases]:
                        challenger_version = v.version
                        break
                    # fallback: tag check
                    try:
                        tags = client.get_model_version(name=model_name, version=v.version).tags
                        if tags.get("alias", "").lower() == "challenger":
                            challenger_version = v.version
                            break
                    except Exception:
                        pass
            if challenger_version:
                break
    except Exception as exc:
        decision["diagnostics"].append({"error_finding_challenger": str(exc)})

    # registrar run de decisión (traza) y tags
    with mlflow.start_run(run_name=mlflow_run_name) as run:
        mlflow.set_tag("decision_flow", "promotion_check")
        mlflow.set_tag("model_name", model_name)
        mlflow.log_metric("rmse_champion", float(rmse_champ))
        mlflow.log_metric("rmse_challenger", float(rmse_chall))
        mlflow.log_metric("delta", float(decision["delta"]))
        mlflow.set_tag("promote_decision", str(decision["promote"]))
        mlflow.log_text(decision["reason"], "decision_reason.txt")
        # guardar JSON detalle
        mlflow.log_dict(decision, "promotion_decision.json")

        # intentar promover en registry si corresponde
        if decision["promote"]:
            if challenger_version is None:
                decision["promote"] = False
                decision["reason"] = "Queríamos promover pero no se encontró versión 'challenger' en el registro"
                mlflow.set_tag("promote_result", "no_version_found")
                mlflow.log_dict(decision, "promotion_decision.json")
            else:
                promoted = False
                try:
                    # intentar asignar alias 'Champion' a la versión challenger
                    try:
                        client.set_registered_model_alias(name=model_name, alias="Champion", version=challenger_version)
                        promoted = True
                    except Exception:
                        # fallback: transicionar a Production (y archivar existentes)
                        try:
                            client.transition_model_version_stage(name=model_name,
                                                                  version=challenger_version,
                                                                  stage="Production",
                                                                  archive_existing_versions=True)
                            promoted = True
                        except Exception as exc_stage:
                            decision["diagnostics"].append({"promotion_error": str(exc_stage)})
                            promoted = False

                    if promoted:
                        mlflow.set_tag("promote_result", "success")
                        decision["promoted_version"] = challenger_version
                        decision["reason"] = f"Promovido challenger v{challenger_version} a Champion/Production"
                    else:
                        mlflow.set_tag("promote_result", "failed")
                except Exception as exc:
                    decision["diagnostics"].append({"promotion_outer_error": str(exc)})
                    mlflow.set_tag("promote_result", "failed_exception")

        else:
            mlflow.set_tag("promote_result", "not_promoted")

    return decision

decision = decide_promotion_and_justify(
    summary_results,
    model_name="workspace.default.nyc-taxi-model",
    delta_threshold=0.0,
    experiment_name=EXPERIMENT_NAME    
    )

🏃 View run promotion_decision at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285/runs/f79ee599785c4d2e86b06aa28241b664
🧪 View experiment at: https://dbc-00d2933e-633b.cloud.databricks.com/ml/experiments/1858882611825285
