In [1]:
import os, mlflow
from dotenv import load_dotenv

load_dotenv(override=True)  # Carga las variables del archivo .env
EXPERIMENT_NAME = "/Users/pipochatgpt@gmail.com/nyc-taxi-experiments"

mlflow.set_tracking_uri("databricks")
experiment = mlflow.set_experiment(experiment_name=EXPERIMENT_NAME)

2025/10/21 21:00:35 INFO mlflow.tracking.fluent: Experiment with name '/Users/pipochatgpt@gmail.com/nyc-taxi-experiments' does not exist. Creating a new experiment.


In [2]:
import pickle
import pandas as pd
from sklearn.metrics import  root_mean_squared_error
from sklearn.feature_extraction import  DictVectorizer

In [3]:
def read_dataframe(filename):

    df = pd.read_parquet(filename)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)

    return df

In [4]:
df_train = read_dataframe('../data/green_tripdata_2025-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2025-02.parquet')

In [5]:
def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)

In [6]:
categorical = ['PULocationID', 'DOLocationID']
numerical = ['trip_distance']
dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

X_val = preprocess(df_val, dv)

In [7]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [8]:
training_dataset = mlflow.data.from_numpy(X_train.data, targets=y_train, name="green_tripdata_2025-01")
validation_dataset = mlflow.data.from_numpy(X_val.data, targets=y_val, name="green_tripdata_2025-02")

In [9]:
import math
import optuna
import pathlib
import xgboost as xgb
from optuna.samplers import TPESampler
from mlflow.models.signature import infer_signature

In [10]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [11]:
# ------------------------------------------------------------
# Definir la función objetivo para Optuna
#    - Recibe un `trial`, que se usa para proponer hiperparámetros.
#    - Entrena un modelo con esos hiperparámetros.
#    - Calcula la métrica de validación (RMSE) y la retorna (Optuna la minimizará).
#    - Abrimos un run anidado de MLflow para registrar cada trial.
# ------------------------------------------------------------
def objective(trial: optuna.trial.Trial):
    # Hiperparámetros MUESTREADOS por Optuna en CADA trial.
    # Nota: usamos log=True para emular rangos log-uniformes (similar a loguniform).
    params = {
        "max_depth": trial.suggest_int("max_depth", 4, 100),
        "learning_rate": trial.suggest_float("learning_rate", math.exp(-3), 1.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha",   math.exp(-5), math.exp(-1), log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", math.exp(-6), math.exp(-1), log=True),
        "min_child_weight": trial.suggest_float("min_child_weight", math.exp(-1), math.exp(3), log=True),
        "objective": "reg:squarederror",  
        "seed": 42,                      
    }

    # Run anidado para dejar rastro de cada trial en MLflow
    with mlflow.start_run(nested=True):
        mlflow.set_tag("model_family", "xgboost")  # etiqueta informativa
        mlflow.log_params(params)                  # registra hiperparámetros del trial

        # Entrenamiento con early stopping en el conjunto de validación
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=100,
            evals=[(valid, "validation")],
            early_stopping_rounds=10,
        )

        # Predicción y métrica en validación
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)

        # Registrar la métrica principal
        mlflow.log_metric("rmse", rmse)

        # La "signature" describe la estructura esperada de entrada y salida del modelo:
        # incluye los nombres, tipos y forma (shape) de las variables de entrada y el tipo de salida.
        # MLflow la usa para validar datos en inferencia y documentar el modelo en el Model Registry.
        signature = infer_signature(X_val, y_pred)

        # Guardar el modelo del trial como artefacto en MLflow.
        mlflow.xgboost.log_model(
            booster,
            name="model",
            input_example=X_val[:5],
            signature=signature
        )

    # Optuna minimiza el valor retornado
    return rmse

In [None]:
mlflow.xgboost.autolog(log_models=False)

# ------------------------------------------------------------
# Crear el estudio de Optuna
#    - Usamos TPE (Tree-structured Parzen Estimator) como sampler.
#    - direction="minimize" porque queremos minimizar el RMSE.
# ------------------------------------------------------------
sampler = TPESampler(seed=42)
study = optuna.create_study(direction="minimize", sampler=sampler)

# ------------------------------------------------------------
# Ejecutar la optimización (n_trials = número de intentos)
#    - Cada trial ejecuta la función objetivo con un set distinto de hiperparámetros.
#    - Abrimos un run "padre" para agrupar toda la búsqueda.
# ------------------------------------------------------------
with mlflow.start_run(run_name="XGBoost Hyperparameter Optimization (Optuna)", nested=True):
    study.optimize(objective, n_trials=10)

    # --------------------------------------------------------
    # Recuperar y registrar los mejores hiperparámetros
    # --------------------------------------------------------
    best_params = study.best_params
    # Asegurar tipos/campos fijos (por claridad y consistencia)
    best_params["max_depth"] = int(best_params["max_depth"])
    best_params["seed"] = 42
    best_params["objective"] = "reg:squarederror"

    mlflow.log_params(best_params)

    # Etiquetas del run "padre" (metadatos del experimento)
    mlflow.set_tags({
        "project": "NYC Taxi Time Prediction Project",
        "optimizer_engine": "optuna",
        "model_family": "xgboost",
        "feature_set_version": 1,
    })

    # --------------------------------------------------------
    # 7) Entrenar un modelo FINAL con los mejores hiperparámetros
    #    (normalmente se haría sobre train+val o con CV; aquí mantenemos el patrón original)
    # --------------------------------------------------------
    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=100,
        evals=[(valid, "validation")],
        early_stopping_rounds=10,
    )

    # Evaluar y registrar la métrica final en validación
    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    # --------------------------------------------------------
    # 8) Guardar artefactos adicionales (p. ej. el preprocesador)
    # --------------------------------------------------------
    pathlib.Path("preprocessor").mkdir(exist_ok=True)
    with open("preprocessor/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)

    mlflow.log_artifact("preprocessor/preprocessor.b", artifact_path="preprocessor")

    # La "signature" describe la estructura esperada de entrada y salida del modelo:
    # incluye los nombres, tipos y forma (shape) de las variables de entrada y el tipo de salida.
    # MLflow la usa para validar datos en inferencia y documentar el modelo en el Model Registry.
    # Si X_val es la matriz dispersa (scipy.sparse) salida de DictVectorizer:
    feature_names = dv.get_feature_names_out()
    input_example = pd.DataFrame(X_val[:5].toarray(), columns=feature_names)

    # Para que las longitudes coincidan, usa el mismo slice en y_pred
    signature = infer_signature(input_example, y_val[:5])

    # Guardar el modelo del trial como artefacto en MLflow.
    mlflow.xgboost.log_model(
        booster,
        name="model",
        input_example=input_example,
        signature=signature
    )

# **Registrar modelo en Model Registry**

In [13]:
model_name = "workspace.default.nyc-taxi-model"

## **De forma Manual**

In [14]:
run_id = input("Ingrese el run_id")
run_uri = f"runs:/{run_id}/model"

result = mlflow.register_model(
    model_uri=run_uri,
    name="workspace.default.nyc-taxi-model"
)

Successfully registered model 'workspace.default.nyc-taxi-model'.


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Created version '1' of model 'workspace.default.nyc-taxi-model'.


## **De forma Automática**

In [15]:
runs = mlflow.search_runs(
    experiment_names=[EXPERIMENT_NAME],
    order_by=["metrics.rmse ASC"],
    output_format="list"
)

# Obtener el mejor run
if len(runs) > 0:
    best_run = runs[0]
    print("🏆 Champion Run encontrado:")
    print(f"Run ID: {best_run.info.run_id}")
    print(f"RMSE: {best_run.data.metrics['rmse']}")
    print(f"Params: {best_run.data.params}")
else:
    print("⚠️ No se encontraron runs con métrica RMSE.")

🏆 Champion Run encontrado:
Run ID: 1e32a80e6fb045b49bb195999ffcac2b
RMSE: 5.862230443089824
Params: {'custom_metric': 'None', 'early_stopping_rounds': '10', 'learning_rate': '0.21992487468175848', 'max_depth': '15', 'maximize': 'None', 'min_child_weight': '1.0357439143907545', 'num_boost_round': '100', 'objective': 'reg:squarederror', 'reg_alpha': '0.007731550026907306', 'reg_lambda': '0.23377457337376373', 'seed': '42', 'verbose_eval': 'True'}


In [None]:
result = mlflow.register_model(
    model_uri=f"runs:/{best_run.info.run_id}/model",
    name=model_name
)

# **Asignar Alias**

In [16]:
from mlflow import MlflowClient

client = MlflowClient()

In [18]:
model_version = result.version
new_alias = "Champion"

client.set_registered_model_alias(
    name=model_name,
    alias=new_alias,
    version=result.version
)

In [19]:
from datetime import datetime

date = datetime.today()

client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_alias} on {date}",
)

<ModelVersion: aliases=[], creation_timestamp=1761102964996, current_stage=None, deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='The model version 1 was transitioned to Champion on 2025-10-21 21:22:15.825153', last_updated_timestamp=1761103336211, metrics=[<Metric: dataset_digest='', dataset_name='', key='best_iteration', model_id='m-0c1aad37d50c416abad8114a634a0ad4', run_id='3a688c9c156648efa181e3064f9d3990', step=0, timestamp=1761102778952, value=80.0>,
 <Metric: dataset_digest='', dataset_name='', key='rmse', model_id='m-0c1aad37d50c416abad8114a634a0ad4', run_id='3a688c9c156648efa181e3064f9d3990', step=0, timestamp=1761102799301, value=5.862230443089824>,
 <Metric: dataset_digest='', dataset_name='', key='stopped_iteration', model_id='m-0c1aad37d50c416abad8114a634a0ad4', run_id='3a688c9c156648efa181e3064f9d3990', s

# **Obteniendo modelos del Model Registry**

In [22]:
import mlflow.pyfunc

model_version_uri = f"models:/{model_name}@Champion"

champion_version = mlflow.pyfunc.load_model(model_version_uri)

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  model.load_model(xgb_model_path)


In [23]:
champion_version.predict(X_val)

MlflowException: Failed to enforce schema of data '<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 44218 stored elements and shape (44218, 449)>
  Coords	Values
  (0, 448)	0.65
  (1, 448)	6.57
  (2, 448)	8.36
  (3, 448)	2.4
  (4, 448)	1.31
  (5, 448)	2.19
  (6, 448)	0.81
  (7, 448)	5.45
  (8, 448)	4.87
  (9, 448)	1.65
  (10, 448)	1.59
  (11, 448)	3.8
  (12, 448)	1.56
  (13, 448)	1.35
  (14, 448)	2.75
  (15, 448)	0.93
  (16, 448)	7.67
  (17, 448)	0.79
  (18, 448)	7.02
  (19, 448)	0.0
  (20, 448)	5.66
  (21, 448)	5.9
  (22, 448)	0.52
  (23, 448)	8.48
  (24, 448)	3.53
  :	:
  (44193, 448)	5.01
  (44194, 448)	0.0
  (44195, 448)	1.16
  (44196, 448)	0.01
  (44197, 448)	12.18
  (44198, 448)	6.05
  (44199, 448)	6.11
  (44200, 448)	12.28
  (44201, 448)	1.76
  (44202, 448)	3.84
  (44203, 448)	0.9
  (44204, 448)	0.97
  (44205, 448)	1.02
  (44206, 448)	6.12
  (44207, 448)	2.02
  (44208, 448)	0.0
  (44209, 448)	3.07
  (44210, 448)	3.7
  (44211, 448)	1.44
  (44212, 448)	4.49
  (44213, 448)	8.23
  (44214, 448)	4.1
  (44215, 448)	4.09
  (44216, 448)	2.25
  (44217, 448)	5.52' with schema '['DOLocationID=1': double (required), 'DOLocationID=10': double (required), 'DOLocationID=100': double (required), 'DOLocationID=101': double (required), 'DOLocationID=102': double (required), 'DOLocationID=106': double (required), 'DOLocationID=107': double (required), 'DOLocationID=108': double (required), 'DOLocationID=11': double (required), 'DOLocationID=111': double (required), 'DOLocationID=112': double (required), 'DOLocationID=113': double (required), 'DOLocationID=114': double (required), 'DOLocationID=116': double (required), 'DOLocationID=117': double (required), 'DOLocationID=119': double (required), 'DOLocationID=12': double (required), 'DOLocationID=120': double (required), 'DOLocationID=121': double (required), 'DOLocationID=122': double (required), 'DOLocationID=123': double (required), 'DOLocationID=124': double (required), 'DOLocationID=125': double (required), 'DOLocationID=126': double (required), 'DOLocationID=127': double (required), 'DOLocationID=128': double (required), 'DOLocationID=129': double (required), 'DOLocationID=13': double (required), 'DOLocationID=130': double (required), 'DOLocationID=131': double (required), 'DOLocationID=132': double (required), 'DOLocationID=133': double (required), 'DOLocationID=134': double (required), 'DOLocationID=135': double (required), 'DOLocationID=136': double (required), 'DOLocationID=137': double (required), 'DOLocationID=138': double (required), 'DOLocationID=139': double (required), 'DOLocationID=14': double (required), 'DOLocationID=140': double (required), 'DOLocationID=141': double (required), 'DOLocationID=142': double (required), 'DOLocationID=143': double (required), 'DOLocationID=144': double (required), 'DOLocationID=145': double (required), 'DOLocationID=146': double (required), 'DOLocationID=147': double (required), 'DOLocationID=148': double (required), 'DOLocationID=149': double (required), 'DOLocationID=15': double (required), 'DOLocationID=150': double (required), 'DOLocationID=151': double (required), 'DOLocationID=152': double (required), 'DOLocationID=153': double (required), 'DOLocationID=154': double (required), 'DOLocationID=155': double (required), 'DOLocationID=157': double (required), 'DOLocationID=158': double (required), 'DOLocationID=159': double (required), 'DOLocationID=16': double (required), 'DOLocationID=160': double (required), 'DOLocationID=161': double (required), 'DOLocationID=162': double (required), 'DOLocationID=163': double (required), 'DOLocationID=164': double (required), 'DOLocationID=165': double (required), 'DOLocationID=166': double (required), 'DOLocationID=167': double (required), 'DOLocationID=168': double (required), 'DOLocationID=169': double (required), 'DOLocationID=17': double (required), 'DOLocationID=170': double (required), 'DOLocationID=171': double (required), 'DOLocationID=173': double (required), 'DOLocationID=174': double (required), 'DOLocationID=175': double (required), 'DOLocationID=177': double (required), 'DOLocationID=178': double (required), 'DOLocationID=179': double (required), 'DOLocationID=18': double (required), 'DOLocationID=180': double (required), 'DOLocationID=181': double (required), 'DOLocationID=182': double (required), 'DOLocationID=183': double (required), 'DOLocationID=184': double (required), 'DOLocationID=185': double (required), 'DOLocationID=186': double (required), 'DOLocationID=188': double (required), 'DOLocationID=189': double (required), 'DOLocationID=19': double (required), 'DOLocationID=190': double (required), 'DOLocationID=191': double (required), 'DOLocationID=192': double (required), 'DOLocationID=193': double (required), 'DOLocationID=194': double (required), 'DOLocationID=195': double (required), 'DOLocationID=196': double (required), 'DOLocationID=197': double (required), 'DOLocationID=198': double (required), 'DOLocationID=20': double (required), 'DOLocationID=200': double (required), 'DOLocationID=201': double (required), 'DOLocationID=202': double (required), 'DOLocationID=203': double (required), 'DOLocationID=205': double (required), 'DOLocationID=207': double (required), 'DOLocationID=208': double (required), 'DOLocationID=209': double (required), 'DOLocationID=21': double (required), 'DOLocationID=210': double (required), 'DOLocationID=211': double (required), 'DOLocationID=212': double (required), 'DOLocationID=213': double (required), 'DOLocationID=215': double (required), 'DOLocationID=216': double (required), 'DOLocationID=217': double (required), 'DOLocationID=218': double (required), 'DOLocationID=219': double (required), 'DOLocationID=22': double (required), 'DOLocationID=220': double (required), 'DOLocationID=222': double (required), 'DOLocationID=223': double (required), 'DOLocationID=224': double (required), 'DOLocationID=225': double (required), 'DOLocationID=226': double (required), 'DOLocationID=227': double (required), 'DOLocationID=228': double (required), 'DOLocationID=229': double (required), 'DOLocationID=23': double (required), 'DOLocationID=230': double (required), 'DOLocationID=231': double (required), 'DOLocationID=232': double (required), 'DOLocationID=233': double (required), 'DOLocationID=234': double (required), 'DOLocationID=235': double (required), 'DOLocationID=236': double (required), 'DOLocationID=237': double (required), 'DOLocationID=238': double (required), 'DOLocationID=239': double (required), 'DOLocationID=24': double (required), 'DOLocationID=240': double (required), 'DOLocationID=241': double (required), 'DOLocationID=242': double (required), 'DOLocationID=243': double (required), 'DOLocationID=244': double (required), 'DOLocationID=245': double (required), 'DOLocationID=246': double (required), 'DOLocationID=247': double (required), 'DOLocationID=248': double (required), 'DOLocationID=249': double (required), 'DOLocationID=25': double (required), 'DOLocationID=250': double (required), 'DOLocationID=252': double (required), 'DOLocationID=253': double (required), 'DOLocationID=254': double (required), 'DOLocationID=255': double (required), 'DOLocationID=256': double (required), 'DOLocationID=257': double (required), 'DOLocationID=258': double (required), 'DOLocationID=259': double (required), 'DOLocationID=26': double (required), 'DOLocationID=260': double (required), 'DOLocationID=261': double (required), 'DOLocationID=262': double (required), 'DOLocationID=263': double (required), 'DOLocationID=264': double (required), 'DOLocationID=265': double (required), 'DOLocationID=27': double (required), 'DOLocationID=28': double (required), 'DOLocationID=29': double (required), 'DOLocationID=3': double (required), 'DOLocationID=31': double (required), 'DOLocationID=32': double (required), 'DOLocationID=33': double (required), 'DOLocationID=34': double (required), 'DOLocationID=35': double (required), 'DOLocationID=36': double (required), 'DOLocationID=37': double (required), 'DOLocationID=38': double (required), 'DOLocationID=39': double (required), 'DOLocationID=4': double (required), 'DOLocationID=40': double (required), 'DOLocationID=41': double (required), 'DOLocationID=42': double (required), 'DOLocationID=43': double (required), 'DOLocationID=45': double (required), 'DOLocationID=46': double (required), 'DOLocationID=47': double (required), 'DOLocationID=48': double (required), 'DOLocationID=49': double (required), 'DOLocationID=50': double (required), 'DOLocationID=51': double (required), 'DOLocationID=52': double (required), 'DOLocationID=53': double (required), 'DOLocationID=54': double (required), 'DOLocationID=55': double (required), 'DOLocationID=56': double (required), 'DOLocationID=57': double (required), 'DOLocationID=58': double (required), 'DOLocationID=60': double (required), 'DOLocationID=61': double (required), 'DOLocationID=62': double (required), 'DOLocationID=63': double (required), 'DOLocationID=64': double (required), 'DOLocationID=65': double (required), 'DOLocationID=66': double (required), 'DOLocationID=67': double (required), 'DOLocationID=68': double (required), 'DOLocationID=69': double (required), 'DOLocationID=7': double (required), 'DOLocationID=70': double (required), 'DOLocationID=71': double (required), 'DOLocationID=72': double (required), 'DOLocationID=73': double (required), 'DOLocationID=74': double (required), 'DOLocationID=75': double (required), 'DOLocationID=76': double (required), 'DOLocationID=77': double (required), 'DOLocationID=78': double (required), 'DOLocationID=79': double (required), 'DOLocationID=8': double (required), 'DOLocationID=80': double (required), 'DOLocationID=81': double (required), 'DOLocationID=82': double (required), 'DOLocationID=83': double (required), 'DOLocationID=85': double (required), 'DOLocationID=86': double (required), 'DOLocationID=87': double (required), 'DOLocationID=88': double (required), 'DOLocationID=89': double (required), 'DOLocationID=9': double (required), 'DOLocationID=90': double (required), 'DOLocationID=91': double (required), 'DOLocationID=92': double (required), 'DOLocationID=93': double (required), 'DOLocationID=94': double (required), 'DOLocationID=95': double (required), 'DOLocationID=96': double (required), 'DOLocationID=97': double (required), 'DOLocationID=98': double (required), 'PULocationID=10': double (required), 'PULocationID=101': double (required), 'PULocationID=102': double (required), 'PULocationID=106': double (required), 'PULocationID=107': double (required), 'PULocationID=108': double (required), 'PULocationID=11': double (required), 'PULocationID=112': double (required), 'PULocationID=116': double (required), 'PULocationID=117': double (required), 'PULocationID=119': double (required), 'PULocationID=120': double (required), 'PULocationID=121': double (required), 'PULocationID=122': double (required), 'PULocationID=123': double (required), 'PULocationID=124': double (required), 'PULocationID=125': double (required), 'PULocationID=126': double (required), 'PULocationID=127': double (required), 'PULocationID=128': double (required), 'PULocationID=129': double (required), 'PULocationID=130': double (required), 'PULocationID=131': double (required), 'PULocationID=132': double (required), 'PULocationID=133': double (required), 'PULocationID=134': double (required), 'PULocationID=135': double (required), 'PULocationID=136': double (required), 'PULocationID=137': double (required), 'PULocationID=138': double (required), 'PULocationID=139': double (required), 'PULocationID=14': double (required), 'PULocationID=140': double (required), 'PULocationID=142': double (required), 'PULocationID=143': double (required), 'PULocationID=145': double (required), 'PULocationID=146': double (required), 'PULocationID=147': double (required), 'PULocationID=149': double (required), 'PULocationID=15': double (required), 'PULocationID=150': double (required), 'PULocationID=151': double (required), 'PULocationID=152': double (required), 'PULocationID=153': double (required), 'PULocationID=154': double (required), 'PULocationID=155': double (required), 'PULocationID=157': double (required), 'PULocationID=159': double (required), 'PULocationID=16': double (required), 'PULocationID=160': double (required), 'PULocationID=161': double (required), 'PULocationID=165': double (required), 'PULocationID=166': double (required), 'PULocationID=167': double (required), 'PULocationID=168': double (required), 'PULocationID=169': double (required), 'PULocationID=17': double (required), 'PULocationID=171': double (required), 'PULocationID=173': double (required), 'PULocationID=174': double (required), 'PULocationID=177': double (required), 'PULocationID=178': double (required), 'PULocationID=179': double (required), 'PULocationID=18': double (required), 'PULocationID=180': double (required), 'PULocationID=181': double (required), 'PULocationID=182': double (required), 'PULocationID=183': double (required), 'PULocationID=184': double (required), 'PULocationID=185': double (required), 'PULocationID=188': double (required), 'PULocationID=189': double (required), 'PULocationID=19': double (required), 'PULocationID=190': double (required), 'PULocationID=191': double (required), 'PULocationID=192': double (required), 'PULocationID=193': double (required), 'PULocationID=194': double (required), 'PULocationID=195': double (required), 'PULocationID=196': double (required), 'PULocationID=197': double (required), 'PULocationID=198': double (required), 'PULocationID=20': double (required), 'PULocationID=200': double (required), 'PULocationID=202': double (required), 'PULocationID=203': double (required), 'PULocationID=205': double (required), 'PULocationID=206': double (required), 'PULocationID=207': double (required), 'PULocationID=208': double (required), 'PULocationID=21': double (required), 'PULocationID=210': double (required), 'PULocationID=211': double (required), 'PULocationID=212': double (required), 'PULocationID=213': double (required), 'PULocationID=215': double (required), 'PULocationID=216': double (required), 'PULocationID=217': double (required), 'PULocationID=218': double (required), 'PULocationID=219': double (required), 'PULocationID=22': double (required), 'PULocationID=220': double (required), 'PULocationID=222': double (required), 'PULocationID=223': double (required), 'PULocationID=225': double (required), 'PULocationID=226': double (required), 'PULocationID=227': double (required), 'PULocationID=228': double (required), 'PULocationID=23': double (required), 'PULocationID=230': double (required), 'PULocationID=232': double (required), 'PULocationID=233': double (required), 'PULocationID=235': double (required), 'PULocationID=236': double (required), 'PULocationID=237': double (required), 'PULocationID=238': double (required), 'PULocationID=24': double (required), 'PULocationID=240': double (required), 'PULocationID=241': double (required), 'PULocationID=242': double (required), 'PULocationID=243': double (required), 'PULocationID=244': double (required), 'PULocationID=246': double (required), 'PULocationID=247': double (required), 'PULocationID=248': double (required), 'PULocationID=25': double (required), 'PULocationID=250': double (required), 'PULocationID=252': double (required), 'PULocationID=253': double (required), 'PULocationID=254': double (required), 'PULocationID=255': double (required), 'PULocationID=256': double (required), 'PULocationID=257': double (required), 'PULocationID=258': double (required), 'PULocationID=259': double (required), 'PULocationID=26': double (required), 'PULocationID=260': double (required), 'PULocationID=262': double (required), 'PULocationID=263': double (required), 'PULocationID=264': double (required), 'PULocationID=265': double (required), 'PULocationID=28': double (required), 'PULocationID=29': double (required), 'PULocationID=3': double (required), 'PULocationID=32': double (required), 'PULocationID=33': double (required), 'PULocationID=34': double (required), 'PULocationID=35': double (required), 'PULocationID=36': double (required), 'PULocationID=37': double (required), 'PULocationID=38': double (required), 'PULocationID=39': double (required), 'PULocationID=40': double (required), 'PULocationID=41': double (required), 'PULocationID=42': double (required), 'PULocationID=43': double (required), 'PULocationID=45': double (required), 'PULocationID=46': double (required), 'PULocationID=47': double (required), 'PULocationID=48': double (required), 'PULocationID=49': double (required), 'PULocationID=51': double (required), 'PULocationID=52': double (required), 'PULocationID=53': double (required), 'PULocationID=54': double (required), 'PULocationID=55': double (required), 'PULocationID=56': double (required), 'PULocationID=57': double (required), 'PULocationID=58': double (required), 'PULocationID=6': double (required), 'PULocationID=60': double (required), 'PULocationID=61': double (required), 'PULocationID=62': double (required), 'PULocationID=63': double (required), 'PULocationID=64': double (required), 'PULocationID=65': double (required), 'PULocationID=66': double (required), 'PULocationID=67': double (required), 'PULocationID=68': double (required), 'PULocationID=69': double (required), 'PULocationID=7': double (required), 'PULocationID=70': double (required), 'PULocationID=71': double (required), 'PULocationID=72': double (required), 'PULocationID=73': double (required), 'PULocationID=74': double (required), 'PULocationID=75': double (required), 'PULocationID=76': double (required), 'PULocationID=77': double (required), 'PULocationID=78': double (required), 'PULocationID=8': double (required), 'PULocationID=80': double (required), 'PULocationID=81': double (required), 'PULocationID=82': double (required), 'PULocationID=83': double (required), 'PULocationID=85': double (required), 'PULocationID=87': double (required), 'PULocationID=89': double (required), 'PULocationID=9': double (required), 'PULocationID=90': double (required), 'PULocationID=91': double (required), 'PULocationID=92': double (required), 'PULocationID=93': double (required), 'PULocationID=94': double (required), 'PULocationID=95': double (required), 'PULocationID=96': double (required), 'PULocationID=97': double (required), 'PULocationID=98': double (required), 'trip_distance': double (required)]'. Error: Expected input to be DataFrame. Found: csr_matrix