In [1]:
import dagshub
import os
import mlflow
import mlflow.sklearn
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, root_mean_squared_error
import numpy as np
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from dagshub import get_repo_bucket_client


In [2]:
dagshub.init(url="https://dagshub.com/colome8/nyc-taxi-time-prediction", mlflow=True)

MLFLOW_TRACKING_URI = mlflow.get_tracking_uri()

print(MLFLOW_TRACKING_URI)

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name="nyc-taxi-experiment")

https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow


<Experiment: artifact_location='mlflow-artifacts:/52f4281e23894b08be828e86a3d02bc9', creation_time=1726632055317, experiment_id='0', last_update_time=1726632055317, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [3]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)
    df = df[(df.duration >= 1) & (df.duration <= 60)]
    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    return df

In [4]:
df_train = read_dataframe('../data/green_tripdata_2024-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2024-02.parquet')

In [5]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [6]:
categorical = ['PU_DO']
numerical = ['trip_distance']
target = 'duration'


In [7]:
dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [8]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [9]:
training_dataset = mlflow.data.from_numpy(X_train.data, targets=y_train, name="green_tripdata_2024-01")
validation_dataset = mlflow.data.from_numpy(X_val.data, targets=y_val, name="green_tripdata_2024-02")

In [10]:
mlflow.sklearn.autolog()



### Gradient Boosting

In [11]:
import os
import mlflow
import dagshub
from mlflow.tracking import MlflowClient

# Configurar DagsHub y MLflow
dagshub.init(repo_owner="colome8", repo_name="nyc-taxi-time-prediction", mlflow=True)

# Obtener la URI de seguimiento de MLflow
mlflow_tracking_uri = mlflow.get_tracking_uri()
print(f"MLflow Tracking URI: {mlflow_tracking_uri}")

# Configurar MLflow para usar la URI de DagsHub
mlflow.set_tracking_uri(mlflow_tracking_uri)

# Nombre del experimento
experiment_name = "nyc-taxi-experiment"

# Crear o obtener el experimento
client = MlflowClient()
try:
    experiment = client.create_experiment(experiment_name)
except mlflow.exceptions.MlflowException:
    experiment = client.get_experiment_by_name(experiment_name)

if experiment:
    print(f"Experiment ID: {experiment.experiment_id}")
    print(f"Artifact Location: {experiment.artifact_location}")
    
    # Configurar el experimento activo
    mlflow.set_experiment(experiment_name)
else:
    print("Failed to create or retrieve the experiment.")

MLflow Tracking URI: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow
Experiment ID: 0
Artifact Location: mlflow-artifacts:/52f4281e23894b08be828e86a3d02bc9


In [12]:
# Función para ejecutar experimentos
def objective(params, model_class, X_train, y_train, X_val, y_val):
    with mlflow.start_run(nested=True):
        model = model_class(**params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_params(params)
        mlflow.log_metric("rmse", rmse)
        return {'loss': rmse, 'status': STATUS_OK}

In [13]:
# Espacios de búsqueda para hiperparámetros
gb_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 100, 500, 50)),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'max_depth': scope.int(hp.quniform('max_depth', 3, 10, 1))
}

rf_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 100, 500, 50)),
    'max_depth': scope.int(hp.quniform('max_depth', 3, 20, 1)),
    'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1))
}


In [14]:
# Experimentos
with mlflow.start_run(run_name="Parent Experiment"):
    # Gradient Boost
    gb_trials = Trials()
    best_gb = fmin(
        fn=lambda params: objective(params, GradientBoostingRegressor, X_train, y_train, X_val, y_val),
        space=gb_space,
        algo=tpe.suggest,
        max_evals=20,
        trials=gb_trials
    )
    
    # Random Forest
    rf_trials = Trials()
    best_rf = fmin(
        fn=lambda params: objective(params, RandomForestRegressor, X_train, y_train, X_val, y_val),
        space=rf_space,
        algo=tpe.suggest,
        max_evals=20,
        trials=rf_trials
    )

    # Seleccionar el mejor modelo
    gb_rmse = min(trial['result']['loss'] for trial in gb_trials.trials)
    rf_rmse = min(trial['result']['loss'] for trial in rf_trials.trials)

    if gb_rmse < rf_rmse:
        best_model = GradientBoostingRegressor(**best_gb)
        best_model_name = "GradientBoost"
    else:
        best_model = RandomForestRegressor(**best_rf)
        best_model_name = "RandomForest"

    # Entrenar el mejor modelo con todos los datos
    best_model.fit(X_train, y_train)

    # Registrar el mejor modelo
    mlflow.sklearn.log_model(best_model, "nyc-taxi-model")
    
    # Asignar el alias 'challenger'
    client = mlflow.tracking.MlflowClient()
    model_version = client.create_model_version("nyc-taxi-model", f"runs:/{mlflow.active_run().info.run_id}/nyc-taxi-model")
    client.set_registered_model_alias("nyc-taxi-model", "challenger", model_version.version)

  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]


2024/11/12 21:19:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run sedate-shark-353 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/6398fd642654432792c53e220bd6db3e.

2024/11/12 21:19:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



  5%|▌         | 1/20 [02:03<39:13, 123.89s/trial, best loss: 5.240192172926639]


2024/11/12 21:20:47 INFO mlflow.tracking._tracking_service.client: 🏃 View run dapper-swan-601 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/119ffdfd31c140fbaaead3cdf1d64ddc.

2024/11/12 21:20:47 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 10%|█         | 2/20 [03:20<28:44, 95.83s/trial, best loss: 5.240192172926639] 


2024/11/12 21:22:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run luminous-foal-717 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/c06b65438c4c4a369b24dc0f3643c3cb.

2024/11/12 21:22:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 15%|█▌        | 3/20 [05:08<28:47, 101.64s/trial, best loss: 5.240192172926639]


2024/11/12 21:23:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run overjoyed-swan-25 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/1261ae8c376544a0b0bf99baffd15c8a.

2024/11/12 21:23:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 20%|██        | 4/20 [05:54<21:13, 79.62s/trial, best loss: 5.240192172926639] 


2024/11/12 21:24:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run omniscient-boar-201 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/86f412b65a944e08a47c45506b909bfb.

2024/11/12 21:24:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 25%|██▌       | 5/20 [07:21<20:35, 82.36s/trial, best loss: 5.240192172926639]


2024/11/12 21:26:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run serious-steed-292 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/19aecd4842424abebcfd1b5c15c29151.

2024/11/12 21:26:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 30%|███       | 6/20 [09:17<21:52, 93.73s/trial, best loss: 5.240192172926639]


2024/11/12 21:27:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run funny-loon-417 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/9d5acceca40e43dba2327c6e2f6fcecd.

2024/11/12 21:27:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 35%|███▌      | 7/20 [10:25<18:28, 85.29s/trial, best loss: 5.240192172926639]


2024/11/12 21:28:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run blushing-pug-443 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/a04112fa874440a79fbe25def7333e3c.

2024/11/12 21:28:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 40%|████      | 8/20 [11:11<14:34, 72.87s/trial, best loss: 5.240192172926639]


2024/11/12 21:30:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run blushing-snail-461 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/01e3be537cc943ca98714b02a6a4aa48.

2024/11/12 21:30:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 45%|████▌     | 9/20 [12:43<14:25, 78.71s/trial, best loss: 5.240192172926639]


2024/11/12 21:30:42 INFO mlflow.tracking._tracking_service.client: 🏃 View run enthused-jay-189 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/d6f74c485cc14f9791f90a09913293b2.

2024/11/12 21:30:42 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 50%|█████     | 10/20 [13:15<10:43, 64.39s/trial, best loss: 5.240192172926639]


2024/11/12 21:31:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run puzzled-ray-145 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/5533bf59a0e5400486ec21fb5679ddcb.

2024/11/12 21:31:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 55%|█████▌    | 11/20 [14:17<09:33, 63.68s/trial, best loss: 5.240192172926639]


2024/11/12 21:33:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run crawling-shark-622 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/41c6e75437034a2f8702881841318664.

2024/11/12 21:33:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 60%|██████    | 12/20 [15:34<09:01, 67.75s/trial, best loss: 5.240192172926639]


2024/11/12 21:33:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run fortunate-conch-762 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/18f15bc6e210450aa2724bb1081316b9.

2024/11/12 21:33:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 65%|██████▌   | 13/20 [16:21<07:09, 61.35s/trial, best loss: 5.240192172926639]


2024/11/12 21:34:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run intrigued-trout-293 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/e9c95edfa6aa4e74a4a8bb238bd85fdc.

2024/11/12 21:34:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 70%|███████   | 14/20 [16:58<05:23, 53.93s/trial, best loss: 5.240192172926639]


2024/11/12 21:35:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run zealous-hawk-103 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/9b5bf9241b794b1c8f75c51353e963a2.

2024/11/12 21:35:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 75%|███████▌  | 15/20 [18:05<04:50, 58.04s/trial, best loss: 5.240192172926639]


2024/11/12 21:36:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run angry-grouse-292 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/2824bc1c48ba4a7fa447bd102bb6d63d.

2024/11/12 21:36:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 80%|████████  | 16/20 [18:50<03:35, 53.99s/trial, best loss: 5.240192172926639]


2024/11/12 21:36:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run big-grub-327 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/4a93ffa9c153444a8210b887416f5b4a.

2024/11/12 21:36:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 85%|████████▌ | 17/20 [19:24<02:24, 48.19s/trial, best loss: 5.240192172926639]


2024/11/12 21:37:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run adventurous-rat-594 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/875f6b92e22c4885a7cc4ecda0974beb.

2024/11/12 21:37:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 90%|█████████ | 18/20 [20:00<01:28, 44.46s/trial, best loss: 5.240192172926639]


2024/11/12 21:38:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run funny-gnat-445 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/ff7ce94f8aa14349ad02439d2618e44f.

2024/11/12 21:38:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 95%|█████████▌| 19/20 [21:17<00:54, 54.07s/trial, best loss: 5.240192172926639]


2024/11/12 21:40:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run brawny-cod-574 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/443866ae4bf546c782a1e5fbd2092f84.

2024/11/12 21:40:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



100%|██████████| 20/20 [22:54<00:00, 68.73s/trial, best loss: 5.240192172926639]
  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]


2024/11/12 21:43:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run legendary-lark-655 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/ce3c86ba00eb422db41b2393ac443f64.

2024/11/12 21:43:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



  5%|▌         | 1/20 [02:59<56:59, 179.95s/trial, best loss: 5.442489763619475]


2024/11/12 21:45:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run hilarious-zebra-14 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/9d445d26b7ac41c794bc41079c860b00.

2024/11/12 21:45:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 10%|█         | 2/20 [05:28<48:23, 161.33s/trial, best loss: 5.405641239808438]


2024/11/12 21:49:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run rebellious-stag-510 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/ec063bb34fc0468bb4985c2dd8f68c7a.

2024/11/12 21:49:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 15%|█▌        | 3/20 [09:01<52:25, 185.05s/trial, best loss: 5.405641239808438]


2024/11/12 21:52:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run delightful-cat-380 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/0950d88a47104bb99be0f93de6abdc69.

2024/11/12 21:52:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 20%|██        | 4/20 [12:00<48:45, 182.83s/trial, best loss: 5.405641239808438]


2024/11/12 21:52:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run nimble-shrike-985 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/a67b48ee00c0462eb1398fcf021ccc29.

2024/11/12 21:52:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 25%|██▌       | 5/20 [12:36<32:27, 129.83s/trial, best loss: 5.405641239808438]


2024/11/12 21:54:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run shivering-ape-891 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/809e1c6805254646a9b04b2f323006d3.

2024/11/12 21:54:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 30%|███       | 6/20 [14:10<27:26, 117.63s/trial, best loss: 5.405641239808438]


2024/11/12 21:55:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run likeable-doe-836 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/c36eb3ae2aca48fea1869541779b5055.

2024/11/12 21:55:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 35%|███▌      | 7/20 [14:47<19:43, 91.05s/trial, best loss: 5.405641239808438] 


2024/11/12 21:56:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run bemused-panda-434 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/3e9e3c59fc5f4a57b3ea873ec6d6ef41.

2024/11/12 21:56:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 40%|████      | 8/20 [15:38<15:40, 78.37s/trial, best loss: 5.405641239808438]


2024/11/12 21:56:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run auspicious-skink-218 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/23a9348da3164784912b4b63437e7584.

2024/11/12 21:56:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 45%|████▌     | 9/20 [16:30<12:53, 70.34s/trial, best loss: 5.405641239808438]


2024/11/12 21:59:20 INFO mlflow.tracking._tracking_service.client: 🏃 View run tasteful-mink-128 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/acc23b5ad74544b89372a923c51c1785.

2024/11/12 21:59:20 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 50%|█████     | 10/20 [18:58<15:41, 94.19s/trial, best loss: 5.405641239808438]


2024/11/13 12:38:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run awesome-steed-976 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/984f09c77e4b40208c085f1c56a157c0.

2024/11/13 12:38:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 55%|█████▌    | 11/20 [14:58:08<40:31:27, 16209.71s/trial, best loss: 5.405641239808438]


2024/11/13 12:40:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run traveling-snail-96 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/11d924f41e43404583bb284ab1683a1c.

2024/11/13 12:40:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 60%|██████    | 12/20 [14:59:59<25:08:18, 11312.29s/trial, best loss: 5.405641239808438]


2024/11/13 12:43:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run youthful-dove-371 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/b2f0040acb0345958fdfea304ef8dddc.

2024/11/13 12:43:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 65%|██████▌   | 13/20 [15:02:50<15:25:59, 7937.05s/trial, best loss: 5.405641239808438] 


2024/11/13 12:45:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run abundant-trout-488 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/549c2ea26fd24d609b6e8e4f435a4c4a.

2024/11/13 12:45:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 70%|███████   | 14/20 [15:05:33<9:18:54, 5589.02s/trial, best loss: 5.39879397171728]  


2024/11/13 12:46:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run puzzled-rat-126 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/3c02409d6ccd446f9d67ff97834eabf9.

2024/11/13 12:46:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 75%|███████▌  | 15/20 [15:06:21<5:26:33, 3918.73s/trial, best loss: 5.39879397171728]


2024/11/13 12:49:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run aged-skunk-442 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/9d8a9049ea8e4f25bcd92b23393ed036.

2024/11/13 12:49:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 80%|████████  | 16/20 [15:09:04<3:05:52, 2788.21s/trial, best loss: 5.39879397171728]


2024/11/13 12:51:20 INFO mlflow.tracking._tracking_service.client: 🏃 View run ambitious-ox-729 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/06c2286b48f4455eb0ed5f12aca16192.

2024/11/13 12:51:20 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 85%|████████▌ | 17/20 [15:10:58<1:39:12, 1984.07s/trial, best loss: 5.39879397171728]


2024/11/13 12:52:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run likeable-mule-979 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/817adde0c57940b6893d53dc12967eb7.

2024/11/13 12:52:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 90%|█████████ | 18/20 [15:12:01<46:53, 1406.80s/trial, best loss: 5.39879397171728]  


2024/11/13 12:53:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run unleashed-fowl-208 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/df57319f127d419bba56ba9937123b4c.

2024/11/13 12:53:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



 95%|█████████▌| 19/20 [15:13:09<16:44, 1004.68s/trial, best loss: 5.39879397171728]


2024/11/13 12:54:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run angry-roo-115 at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/46d3cecc573d40909f5e33a372229f09.

2024/11/13 12:54:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.



100%|██████████| 20/20 [15:14:07<00:00, 2742.38s/trial, best loss: 5.39879397171728]


2024/11/13 12:54:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run Parent Experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0/runs/8a75041f3c394537ade24a79f0fbcb90.
2024/11/13 12:54:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/colome8/nyc-taxi-time-prediction.mlflow/#/experiments/0.


InvalidParameterError: The 'max_depth' parameter of GradientBoostingRegressor must be an int in the range [1, inf) or None. Got np.float64(9.0) instead.

In [None]:
import mlflow
from mlflow.exceptions import MlflowException
from sklearn.metrics import root_mean_squared_error

# Función para cargar un modelo de forma segura
def safe_load_model(model_name, stage):
    try:
        return mlflow.sklearn.load_model(f"models:/{model_name}@{stage}")
    except MlflowException as e:
        print(f"Error loading {stage} model: {e}")
        return None

# Evaluar modelos en el conjunto de prueba
champion_model = safe_load_model("nyc-taxi-model", "champion")
challenger_model = safe_load_model("nyc-taxi-model", "challenger")

if champion_model and challenger_model:
    champion_rmse = root_mean_squared_error(y_test, champion_model.predict(X_test))
    challenger_rmse = root_mean_squared_error(y_test, challenger_model.predict(X_test))

    print(f"Champion RMSE: {champion_rmse}")
    print(f"Challenger RMSE: {challenger_rmse}")

    # Decidir si promover el challenger a champion
    if challenger_rmse < champion_rmse * 0.95:
        print("El challenger supera significativamente al champion y debería ser promovido.")
        client.set_registered_model_alias("nyc-taxi-model", "champion", model_version.version)
    else:
        print("El challenger no supera significativamente al champion. Mantenemos el champion actual.")
else:
    print("No se pudieron cargar uno o ambos modelos. Verifica que existan en el Model Registry.")

# Si no hay un modelo champion, podríamos querer promover el challenger automáticamente
if not champion_model and challenger_model:
    print("No existe un modelo champion. Promoviendo el challenger a champion.")
    client.set_registered_model_alias("nyc-taxi-model", "champion", model_version.version)

# Asegúrate de que el challenger siempre se registre
if challenger_model:
    client.set_registered_model_alias("nyc-taxi-model", "challenger", model_version.version)
else:
    print("No se pudo registrar el modelo challenger. Verifica el proceso de entrenamiento y registro.")

In [None]:
# Decidir si promover el challenger a champion
if challenger_rmse < champion_rmse * 0.95:
    print("El challenger supera significativamente al champion y debería ser promovido.")
    client.set_registered_model_alias("nyc-taxi-model", "champion", model_version.version)
else:
    print("El challenger no supera significativamente al champion. Mantenemos el champion actual.")


In [None]:
# Subir datos a DagsHub
s3 = get_repo_bucket_client("colome8/nyc-taxi-time-prediction")

s3.upload_file(
    Bucket="nys-taxi-time-prediction",
    Filename="../data/green_tripdata_2024-03.parquet",
    Key="test_data.parquet",
)