In [None]:
import pandas as pd
import numpy as np
# Importa la librería mlflow para el seguimiento de experimentos de machine learning
import mlflow

# uv add xgboost
from xgboost import XGBClassifier
# uv add lightgbm
from lightgbm import LGBMClassifier
# uv add catboost
from catboost import CatBoostClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier, BaggingClassifier, RandomForestClassifier, VotingClassifier
from sklearn.dummy import DummyClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

from sklearn.metrics import accuracy_score, f1_score

In [15]:
# Importa la librería dagshub para integrar el seguimiento de experimentos con DagsHub
import dagshub

# Inicializa la integración con DagsHub, especificando el propietario y nombre del repositorio,
# y habilita la integración con MLflow para registrar experimentos en DagsHub
dagshub.init(
  repo_owner='edynsoncoronado',
  repo_name='ml_supervisado_avanzado',
  mlflow=True
)

# # Inicia una nueva ejecución de MLflow
# with mlflow.start_run():
  # # Registra un parámetro personalizado en el experimento de MLflow
  # mlflow.log_param('parameter name', 'value')
  # # Registra una métrica personalizada en el experimento de MLflow
  # mlflow.log_metric('metric name', 1)

In [None]:
df = pd.read_csv("../data/raw/hotel_bookings.csv")
df.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,No Deposit,,,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,No Deposit,304.0,,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,No Deposit,240.0,,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [24]:
FEATURES = ["lead_time", "stays_in_week_nights", "children", "adr", "booking_changes" ]
X, y = df[FEATURES], df["is_canceled"]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=100)

In [25]:
# Establece la URI de seguimiento de MLflow para que apunte al servidor remoto de DagsHub,
# permitiendo así registrar y visualizar experimentos de MLflow en esa plataforma.
mlflow.set_tracking_uri("https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow")

In [26]:
# Crea un nuevo experimento en MLflow llamado "hotel_bookings_experiment"
mlflow.create_experiment("hotel_bookings_experiment2")

'2'

In [28]:
# Establece el experimento actual de MLflow en "hotel_bookings_experiment"
mlflow.set_experiment("hotel_bookings_experiment2")

<Experiment: artifact_location='mlflow-artifacts:/5abc705f82074b9c964637adc38d94a8', creation_time=1753290734582, experiment_id='2', last_update_time=1753290734582, lifecycle_stage='active', name='hotel_bookings_experiment2', tags={}>

# Baseline

In [29]:
# Activa el registro automático de parámetros, métricas y artefactos de modelos compatibles con MLflow
mlflow.autolog()

# Inicia una nueva ejecución de MLflow con el nombre especificado
with mlflow.start_run(run_name="Baseline - Dummy Classifier - Con métricas") as run:

    # Crea una instancia del clasificador Dummy usando la estrategia "most_frequent"
    algorithm = DummyClassifier(strategy="most_frequent")
    # Ajusta el clasificador Dummy con los datos de entrenamiento
    algorithm.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = algorithm.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
    )

2025/07/23 12:13:43 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/07/23 12:13:43 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/07/23 12:13:43 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


🏃 View run Baseline - Dummy Classifier - Con métricas at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/ba76724c525843ed8e5937dcf6265f55
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Regresión Logística

In [30]:
# Inicia una nueva ejecución de MLflow con el nombre "Regresión logistica"
with mlflow.start_run(run_name="Regresión logistica") as run:

    # Crea una instancia del modelo de Regresión Logística
    algorithm = LogisticRegression()
    # Define un pipeline que primero imputa valores faltantes y luego aplica la regresión logística
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("reg_logistica", algorithm)                  # Aplica el modelo de regresión logística
        ]
    )
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
    )





🏃 View run Regresión logistica at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/addf8a6d74b644b996d7c119ceda53de
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Ensamble 1: Bagging

In [31]:
# Inicia una nueva ejecución de MLflow con el nombre "Bagging"
with mlflow.start_run(run_name="Bagging") as run:

    # Crea una instancia del clasificador Bagging
    algorithm = BaggingClassifier()
    # Define un pipeline que primero imputa valores faltantes y luego aplica el clasificador Bagging
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("bagging", algorithm)                        # Aplica el modelo Bagging
        ]
    )
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
    )



🏃 View run Bagging at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/ef693878b73847d39ef616266e3d281b
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Ensamble 2: Random Forest

In [32]:
# Inicia una nueva ejecución de MLflow con el nombre "Random Forest"
with mlflow.start_run(run_name="Random Forest") as run:

    # Crea una instancia del clasificador Random Forest
    algorithm = RandomForestClassifier()
    # Define un pipeline que primero imputa valores faltantes y luego aplica el clasificador Random Forest
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("rf", algorithm)                             # Aplica el modelo Random Forest
        ]
    )
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
    )




🏃 View run Random Forest at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/31f6641bb4cd4f73926d204beb520399
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Ensamble 3: XGboost

In [33]:
# Inicia una nueva ejecución de MLflow con el nombre "XGboost"
with mlflow.start_run(run_name="XGboost") as run:

    # Crea una instancia del clasificador XGBoost con profundidad máxima de 5 y 101 árboles
    algorithm = XGBClassifier(
        max_depth=5,         # Establece la profundidad máxima de cada árbol en 5
        n_estimators=101     # Establece el número de árboles en el modelo en 101
    )
    # Define un pipeline que primero imputa valores faltantes y luego aplica el clasificador XGBoost
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("xgb", algorithm)                            # Aplica el modelo XGBoost
        ]
    )
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
    )



🏃 View run XGboost at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/25051685006c4e5d8de8071c6f7c464c
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Ensamble 4: LGBM

In [34]:
# Inicia una nueva ejecución de MLflow con el nombre "LGBM"
with mlflow.start_run(run_name="LGBM") as run:
    
    # Crea una instancia del clasificador LightGBM
    algorithm = LGBMClassifier()
    
    # Define un pipeline que primero imputa valores faltantes y luego aplica el clasificador LightGBM
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("lgbm", algorithm)                           # Aplica el modelo LightGBM
        ]
    )
    
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)
    
    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)
    
    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }   
    )



[LightGBM] [Info] Number of positive: 33167, number of negative: 56375
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 555
[LightGBM] [Info] Number of data points in the train set: 89542, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370407 -> initscore=-0.530470
[LightGBM] [Info] Start training from score -0.530470




🏃 View run LGBM at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/061bd5c6ad474c51b278e71c570dae43
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Ensamble 5: Catboost

In [35]:
# Inicia una nueva ejecución de MLflow con el nombre "CatBoost"
with mlflow.start_run(run_name="CatBoost") as run:

    # Crea una instancia del clasificador CatBoost
    algorithm = CatBoostClassifier()
    
    # Define un pipeline que primero imputa valores faltantes y luego aplica el clasificador CatBoost
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("catboost", algorithm)                       # Aplica el modelo CatBoost
        ]
    )
    
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }        
    )



Learning rate set to 0.070218
0:	learn: 0.6738091	total: 5.81ms	remaining: 5.81s
1:	learn: 0.6577196	total: 10.5ms	remaining: 5.24s
2:	learn: 0.6435688	total: 17.5ms	remaining: 5.81s
3:	learn: 0.6321461	total: 23.2ms	remaining: 5.79s
4:	learn: 0.6222567	total: 28.8ms	remaining: 5.73s
5:	learn: 0.6132893	total: 34.8ms	remaining: 5.77s
6:	learn: 0.6055982	total: 40.4ms	remaining: 5.73s
7:	learn: 0.5995170	total: 46.9ms	remaining: 5.82s
8:	learn: 0.5937286	total: 53.3ms	remaining: 5.87s
9:	learn: 0.5890019	total: 59.6ms	remaining: 5.91s
10:	learn: 0.5846239	total: 66.6ms	remaining: 5.99s
11:	learn: 0.5811667	total: 77.5ms	remaining: 6.38s
12:	learn: 0.5778450	total: 90.7ms	remaining: 6.89s
13:	learn: 0.5752667	total: 101ms	remaining: 7.13s
14:	learn: 0.5728444	total: 108ms	remaining: 7.08s
15:	learn: 0.5706692	total: 117ms	remaining: 7.18s
16:	learn: 0.5687932	total: 122ms	remaining: 7.08s
17:	learn: 0.5673718	total: 129ms	remaining: 7.03s
18:	learn: 0.5658373	total: 135ms	remaining: 6.97



🏃 View run CatBoost at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/1486aaeb359445119f072bda00748bb5
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Ensamble 6: Voting

In [36]:
with mlflow.start_run(run_name="Ensamble de Votos") as run:
    
    # Crea una instancia del clasificador CatBoost
    algorithm1 = CatBoostClassifier()
    # Crea una instancia del clasificador Random Forest
    algorithm2 = RandomForestClassifier()
    # Crea una instancia del clasificador LightGBM
    algorithm3 = LGBMClassifier()

    # Define el clasificador de ensamble Voting, usando los tres modelos anteriores
    voting_clf = VotingClassifier(
        estimators=[
            ("catboost", algorithm1),  # Primer modelo base: CatBoost
            ("rf", algorithm2),        # Segundo modelo base: Random Forest
            ("lgbm", algorithm3),      # Tercer modelo base: LightGBM
        ],
        voting="hard"                 # Usa votación mayoritaria (hard voting)
    )
    # Define un pipeline que primero imputa valores faltantes y luego aplica el clasificador Voting
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("voting", voting_clf)                        # Aplica el modelo Voting
        ]
    )
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
    )



Learning rate set to 0.070218
0:	learn: 0.6738091	total: 6.13ms	remaining: 6.13s
1:	learn: 0.6577196	total: 11.7ms	remaining: 5.85s
2:	learn: 0.6435688	total: 18.3ms	remaining: 6.09s
3:	learn: 0.6321461	total: 24ms	remaining: 5.96s
4:	learn: 0.6222567	total: 29.1ms	remaining: 5.79s
5:	learn: 0.6132893	total: 35.1ms	remaining: 5.82s
6:	learn: 0.6055982	total: 40.1ms	remaining: 5.69s
7:	learn: 0.5995170	total: 45.2ms	remaining: 5.61s
8:	learn: 0.5937286	total: 52.5ms	remaining: 5.78s
9:	learn: 0.5890019	total: 58.8ms	remaining: 5.82s
10:	learn: 0.5846239	total: 63.7ms	remaining: 5.73s
11:	learn: 0.5811667	total: 70.4ms	remaining: 5.8s
12:	learn: 0.5778450	total: 76.1ms	remaining: 5.78s
13:	learn: 0.5752667	total: 81.7ms	remaining: 5.76s
14:	learn: 0.5728444	total: 87.2ms	remaining: 5.73s
15:	learn: 0.5706692	total: 92.7ms	remaining: 5.7s
16:	learn: 0.5687932	total: 99.6ms	remaining: 5.76s
17:	learn: 0.5673718	total: 106ms	remaining: 5.76s
18:	learn: 0.5658373	total: 111ms	remaining: 5.73



🏃 View run Ensamble de Votos at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/ca09aa4eb4e54227942e5546eea052f9
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2


# Ensamble 7: Stacking

In [37]:
# Inicia una nueva ejecución de MLflow con el nombre "Ensamble de Pilas"
with mlflow.start_run(run_name="Ensamble de Pilas") as run:
    
    # Crea una instancia del clasificador CatBoost
    algorithm1 = CatBoostClassifier()
    # Crea una instancia del clasificador XGBoost
    algorithm2 = XGBClassifier()
    # Crea una instancia del clasificador LightGBM
    algorithm3 = LGBMClassifier()

    # Define el clasificador de ensamble Stacking, usando los tres modelos anteriores como base
    # y RandomForestClassifier como estimador final
    stacking_clf = StackingClassifier(
        estimators=[
            ("catboost", algorithm1),  # Primer modelo base: CatBoost
            ("xgb", algorithm2),       # Segundo modelo base: XGBoost
            ("lgbm", algorithm3),      # Tercer modelo base: LightGBM
        ],
        final_estimator=RandomForestClassifier()  # Modelo final: Random Forest
    )
    # Define un pipeline que primero imputa valores faltantes y luego aplica el clasificador Stacking
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),  # Imputa valores faltantes usando la media
            ("staking", stacking_clf)                     # Aplica el modelo Stacking
        ]
    )
    # Ajusta el pipeline con los datos de entrenamiento
    pipeline.fit(X_train, y_train)

    # Realiza predicciones sobre el conjunto de prueba
    predictions = pipeline.predict(X_test)

    # Calcula la métrica de exactitud (accuracy) usando las etiquetas verdaderas y las predichas
    _accuracy_score = accuracy_score(y_test, predictions)
    # Calcula la métrica F1 usando las etiquetas verdaderas y las predichas
    _f1_score = f1_score(y_test, predictions)
    
    # Registra las métricas calculadas en MLflow
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }   
    )



Learning rate set to 0.070218
0:	learn: 0.6738091	total: 5.31ms	remaining: 5.3s
1:	learn: 0.6577196	total: 11.7ms	remaining: 5.86s
2:	learn: 0.6435688	total: 17.7ms	remaining: 5.88s
3:	learn: 0.6321461	total: 25ms	remaining: 6.22s
4:	learn: 0.6222567	total: 31.7ms	remaining: 6.31s
5:	learn: 0.6132893	total: 37.4ms	remaining: 6.2s
6:	learn: 0.6055982	total: 45.2ms	remaining: 6.41s
7:	learn: 0.5995170	total: 50.9ms	remaining: 6.31s
8:	learn: 0.5937286	total: 58.9ms	remaining: 6.49s
9:	learn: 0.5890019	total: 66.6ms	remaining: 6.59s
10:	learn: 0.5846239	total: 71.6ms	remaining: 6.44s
11:	learn: 0.5811667	total: 78.5ms	remaining: 6.47s
12:	learn: 0.5778450	total: 84ms	remaining: 6.38s
13:	learn: 0.5752667	total: 91ms	remaining: 6.41s
14:	learn: 0.5728444	total: 97.1ms	remaining: 6.38s
15:	learn: 0.5706692	total: 103ms	remaining: 6.32s
16:	learn: 0.5687932	total: 110ms	remaining: 6.37s
17:	learn: 0.5673718	total: 116ms	remaining: 6.31s
18:	learn: 0.5658373	total: 124ms	remaining: 6.4s
19:	l



[LightGBM] [Info] Number of positive: 26534, number of negative: 45100
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000930 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 554
[LightGBM] [Info] Number of data points in the train set: 71634, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370411 -> initscore=-0.530455
[LightGBM] [Info] Start training from score -0.530455
[LightGBM] [Info] Number of positive: 26534, number of negative: 45100
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000954 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 555
[LightGBM] [Info] Number of data points in the train set: 71634, number of used features: 5
[LightGBM] [Info] [bin



[LightGBM] [Info] Number of positive: 26534, number of negative: 45100
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000904 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 554
[LightGBM] [Info] Number of data points in the train set: 71634, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370411 -> initscore=-0.530455
[LightGBM] [Info] Start training from score -0.530455




🏃 View run Ensamble de Pilas at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2/runs/6e8d54ad44fd40f1a93e11652fa905c0
🧪 View experiment at: https://dagshub.com/edynsoncoronado/ml_supervisado_avanzado.mlflow/#/experiments/2
