# Clase 9: Modelamiento
## DP-100 Azure Data Scientist / DSRP
## Clase 23 Septiembre

### Dataset Utilizado: Hotel Booking

https://www.kaggle.com/code/touba7/hotel-booking

In [1]:
import pandas as pd
import mlflow


from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    classification_report, 
    roc_auc_score, 
    confusion_matrix
)

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from hyperopt import fmin, hp
from loguru import logger



# CREDENCIALES AZURE
ml_client = MLClient.from_config(credential=DefaultAzureCredential())
# METADATA ASSET
data_asset = ml_client.data.get("gold-booking-dsrp", version="2")


Found the config file in: /config.json


In [2]:
modeling_dataframe = pd.read_csv(data_asset.path)

## Preparación de los datos

In [3]:
TARGET_COLUMN = "is_canceled"

X = modeling_dataframe.drop(TARGET_COLUMN, axis=1)
y = modeling_dataframe[TARGET_COLUMN]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1000)

In [4]:
X_train

Unnamed: 0,lead_time,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,is_repeated_guest,previous_cancellations,...,reserved_room_type_H,reserved_room_type_L,reserved_room_type_P,deposit_type_No Deposit,deposit_type_Non Refund,deposit_type_Refundable,customer_type_Contract,customer_type_Group,customer_type_Transient,customer_type_Transient-Party
89468,189.0,21.0,19.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
54838,386.0,31.0,28.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
62385,552.0,2.0,12.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
101395,18.0,46.0,7.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
103342,87.0,51.0,17.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113915,51.0,23.0,9.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3776,5.0,2.0,4.0,1.0,3.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
71751,80.0,28.0,11.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
70231,139.0,24.0,12.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [5]:
X_test

Unnamed: 0,lead_time,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,is_repeated_guest,previous_cancellations,...,reserved_room_type_H,reserved_room_type_L,reserved_room_type_P,deposit_type_No Deposit,deposit_type_Non Refund,deposit_type_Refundable,customer_type_Contract,customer_type_Group,customer_type_Transient,customer_type_Transient-Party
110695,221.0,17.0,28.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
74453,304.0,34.0,17.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
94193,301.0,31.0,30.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1387,30.0,35.0,28.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
7654,29.0,34.0,18.0,2.0,5.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101240,1.0,45.0,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
54272,49.0,29.0,11.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
27544,70.0,35.0,26.0,2.0,5.0,2.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
13809,7.0,38.0,13.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


# Modelamiento

In [10]:
import mlflow
from azureml.core import Workspace

EXPERIMENT_NAME = "DSRP - Booking Cancellation Prediction 2024"

ws = Workspace.from_config()
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

mlflow.create_experiment(EXPERIMENT_NAME)
mlflow.set_experiment(EXPERIMENT_NAME)

<Experiment: artifact_location='', creation_time=1727144276340, experiment_id='198d9690-6de9-4f7f-bde2-99cc70cb1836', last_update_time=None, lifecycle_stage='active', name='DSRP - Booking Cancellation Prediction 2024', tags={}>

# Baseline

In [14]:
# 1. INSTANCIAR EL ALGORITMO
# 2. EJECUTAR EL METODO TRAIN
# 3. EJECUTAR EL METODO PREDICT
# 4. CALCULAR METRICAS EN EL DATASET DE SET

with mlflow.start_run(run_name="Dummy Baseline"):

    dummy_params = {
        "strategy":"uniform"
    }

    dummy_classifier = DummyClassifier(**dummy_params)
    dummy_classifier.fit(X_train, y_train)

    mlflow.log_params(dummy_params)

    dummy_predictions = dummy_classifier.predict(X_test)

    metrics = {
        "accuracy_score": accuracy_score(y_test, dummy_predictions),
        "recall_score": recall_score(y_test, dummy_predictions),
        "precision_score":precision_score(y_test, dummy_predictions),
        "f1_score": f1_score(y_test, dummy_predictions)
    }

    mlflow.log_metrics(
        metrics
    )

    logger.info(f"Accuracy: {metrics['accuracy_score']}")
    logger.info(f"Recall: {metrics['recall_score']}")
    logger.info(f"Precision: {metrics['precision_scoe']}")
    logger.info(f"F1-Score: {metrics['f1_score']}")

[32m2024-09-24 02:27:44.811[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1mAccuracy: 0.5039366781137449[0m
[32m2024-09-24 02:27:44.812[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mRecall: 0.5009078529278257[0m
[32m2024-09-24 02:27:44.813[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mPrecision: 0.37214400134895875[0m
[32m2024-09-24 02:27:44.814[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m33[0m - [1mF1-Score: 0.42703042615972525[0m
2024/09/24 02:27:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run Dummy Baseline at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/50eb643a-09b7-4cf0-b638-b4a25e21a204.
2024/09/24 02:27:44 INFO mlflow.tracking._tr

# Regresión Logística

In [15]:

with mlflow.start_run(run_name="Regresión Logística"):

    logreg_classifier = LogisticRegression()
    logreg_classifier.fit(X_train, y_train)

    logreg_predictions = logreg_classifier.predict(X_test)

    metrics = {
        "accuracy_score": accuracy_score(y_test, logreg_predictions),
        "recall_score": recall_score(y_test, logreg_predictions),
        "precision_score":precision_score(y_test, logreg_predictions),
        "f1_score": f1_score(y_test, logreg_predictions)
    }

    mlflow.log_metrics(
        metrics
    )

    logger.info(f"Accuracy: {accuracy_score(y_test, logreg_predictions)}")
    logger.info(f"Recall: {recall_score(y_test, logreg_predictions)}")
    logger.info(f"Precision: {precision_score(y_test, logreg_predictions)}")
    logger.info(f"F1-Score: {f1_score(y_test, logreg_predictions)}")
    print(confusion_matrix(y_test, logreg_predictions))
    print(
        classification_report(y_test, logreg_predictions)
    )

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[32m2024-09-24 02:30:40.411[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mAccuracy: 0.8043387218359996[0m
[32m2024-09-24 02:30:40.427[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m20[0m - [1mRecall: 0.6129142078983205[0m
[32m2024-09-24 02:30:40.442[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mPrecision: 0.8107175022515761[0m
[32m2024-09-24 02:30:40.457[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m22[0m - [1mF1-Score: 0.69807418896213[0m
2024/09/24 02:30:40 INFO mlflow.tracking._tracking_service.clien

[[13805  1261]
 [ 3411  5401]]
              precision    recall  f1-score   support

         0.0       0.80      0.92      0.86     15066
         1.0       0.81      0.61      0.70      8812

    accuracy                           0.80     23878
   macro avg       0.81      0.76      0.78     23878
weighted avg       0.81      0.80      0.80     23878



## Vecino más cercano - KNN

In [16]:

with mlflow.start_run(run_name="KNN"): 

    knn_params = {
        "n_neighbors": 5
    }

    knn_classifier = KNeighborsClassifier(**knn_params)
    knn_classifier.fit(X_train, y_train)

    knn_predictions = knn_classifier.predict(X_test)
    
    mlflow.log_params(knn_params)

    metrics = {
        "accuracy_score": accuracy_score(y_test, knn_predictions),
        "recall_score": recall_score(y_test, knn_predictions),
        "precision_score":precision_score(y_test, knn_predictions),
        "f1_score": f1_score(y_test, knn_predictions)
    }

    mlflow.log_metrics(
        metrics
    )

    logger.info(f"Accuracy: {accuracy_score(y_test, knn_predictions)}")
    logger.info(f"Recall: {recall_score(y_test, knn_predictions)}")
    logger.info(f"Precision: {precision_score(y_test, knn_predictions)}")
    logger.info(f"F1-Score: {f1_score(y_test, knn_predictions)}")
    print(confusion_matrix(y_test, knn_predictions))
    print(
        classification_report(y_test, knn_predictions)
    )

[32m2024-09-24 02:33:09.364[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m25[0m - [1mAccuracy: 0.7725940195996315[0m
[32m2024-09-24 02:33:09.380[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m26[0m - [1mRecall: 0.6334543803903767[0m
[32m2024-09-24 02:33:09.396[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mPrecision: 0.717296324852223[0m
[32m2024-09-24 02:33:09.412[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m28[0m - [1mF1-Score: 0.672773291551163[0m
2024/09/24 02:33:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run KNN at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/33d8ba11-7402-4ddb-8e23-c8571b04843f.
2024/09/24 02:33:09 INFO mlflow.tracking._tracking_service.

[[12866  2200]
 [ 3230  5582]]
              precision    recall  f1-score   support

         0.0       0.80      0.85      0.83     15066
         1.0       0.72      0.63      0.67      8812

    accuracy                           0.77     23878
   macro avg       0.76      0.74      0.75     23878
weighted avg       0.77      0.77      0.77     23878



# Random Forest

In [17]:

with mlflow.start_run(run_name="Random Forest"): 

    rf_params = {
        "n_estimators": 300,
        "max_depth": 3

    }
 
    rf_classifier = RandomForestClassifier(
       **rf_params
    )
    rf_classifier.fit(X_train, y_train)

    mlflow.log_params(rf_params)

    rf_predictions = rf_classifier.predict(X_test)

    metrics = {
        "accuracy_score": accuracy_score(y_test, rf_predictions),
        "recall_score": recall_score(y_test, rf_predictions),
        "precision_score":precision_score(y_test, rf_predictions),
        "f1_score": f1_score(y_test, rf_predictions)
    }

    mlflow.log_metrics(
        metrics
    )

    logger.info(f"Accuracy: {accuracy_score(y_test, rf_predictions)}")
    logger.info(f"Recall: {recall_score(y_test, rf_predictions)}")
    logger.info(f"Precision: {precision_score(y_test, rf_predictions)}")
    logger.info(f"F1-Score: {f1_score(y_test, rf_predictions)}")
    print(confusion_matrix(y_test, rf_predictions))
    print(
        classification_report(y_test, rf_predictions)
    )

[32m2024-09-24 02:40:28.509[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m29[0m - [1mAccuracy: 0.7630454811960801[0m
[32m2024-09-24 02:40:28.534[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1mRecall: 0.3600771674988652[0m
[32m2024-09-24 02:40:28.549[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m31[0m - [1mPrecision: 0.9940476190476191[0m
[32m2024-09-24 02:40:28.567[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m32[0m - [1mF1-Score: 0.5286571142952349[0m
2024/09/24 02:40:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/c831df97-076f-4888-b8c4-2c6dd89c49d6.
2024/09/24 02:40:28 INFO mlflow.tracking._track

[[15047    19]
 [ 5639  3173]]
              precision    recall  f1-score   support

         0.0       0.73      1.00      0.84     15066
         1.0       0.99      0.36      0.53      8812

    accuracy                           0.76     23878
   macro avg       0.86      0.68      0.69     23878
weighted avg       0.83      0.76      0.73     23878



# Gradient Boosting Machine

In [18]:

with mlflow.start_run(run_name="Gradient Boosting Machine"): 

    gbm_classifier = GradientBoostingClassifier()
    gbm_classifier.fit(X_train, y_train)

    gbm_predictions = gbm_classifier.predict(X_test)

    metrics = {
        "accuracy_score": accuracy_score(y_test, gbm_predictions),
        "recall_score": recall_score(y_test, gbm_predictions),
        "precision_score":precision_score(y_test, gbm_predictions),
        "f1_score": f1_score(y_test, gbm_predictions)
    }

    mlflow.log_metrics(
        metrics
    )

    logger.info(f"Accuracy: {accuracy_score(y_test, gbm_predictions)}")
    logger.info(f"Recall: {recall_score(y_test, gbm_predictions)}")
    logger.info(f"Precision: {precision_score(y_test, gbm_predictions)}")
    logger.info(f"F1-Score: {f1_score(y_test, gbm_predictions)}")
    print(confusion_matrix(y_test, gbm_predictions))
    print(
        classification_report(y_test, gbm_predictions)
    )

[32m2024-09-24 02:40:53.891[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mAccuracy: 0.8215093391406315[0m
[32m2024-09-24 02:40:53.909[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m20[0m - [1mRecall: 0.6235814798002723[0m
[32m2024-09-24 02:40:53.927[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mPrecision: 0.8532608695652174[0m
[32m2024-09-24 02:40:53.944[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m22[0m - [1mF1-Score: 0.7205612378704432[0m
2024/09/24 02:40:54 INFO mlflow.tracking._tracking_service.client: 🏃 View run Gradient Boosting Machine at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/bfa3ba17-dab6-498e-90e0-4293a2a53894.
2024/09/24 02:40:54 INFO mlflow.tra

[[14121   945]
 [ 3317  5495]]
              precision    recall  f1-score   support

         0.0       0.81      0.94      0.87     15066
         1.0       0.85      0.62      0.72      8812

    accuracy                           0.82     23878
   macro avg       0.83      0.78      0.79     23878
weighted avg       0.83      0.82      0.81     23878



# XGboost

In [19]:


with mlflow.start_run(run_name="XGboost"): 
    xgb_classifier = XGBClassifier()
    xgb_classifier.fit(X_train, y_train)

    xgb_predictions = xgb_classifier.predict(X_test)

    metrics = {
        "accuracy_score": accuracy_score(y_test, xgb_predictions),
        "recall_score": recall_score(y_test, xgb_predictions),
        "precision_score":precision_score(y_test, xgb_predictions),
        "f1_score": f1_score(y_test, xgb_predictions)
    }

    mlflow.log_metrics(
        metrics
    )

    logger.info(f"Accuracy: {accuracy_score(y_test, xgb_predictions)}")
    logger.info(f"Recall: {recall_score(y_test, xgb_predictions)}")
    logger.info(f"Precision: {precision_score(y_test, xgb_predictions)}")
    logger.info(f"F1-Score: {f1_score(y_test, xgb_predictions)}")
    print(confusion_matrix(y_test, xgb_predictions))
    print(
        classification_report(y_test, xgb_predictions)
    )

[32m2024-09-24 02:41:06.059[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1mAccuracy: 0.8512019432113243[0m
[32m2024-09-24 02:41:06.075[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mRecall: 0.7291193826600091[0m
[32m2024-09-24 02:41:06.108[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m20[0m - [1mPrecision: 0.8463970491371361[0m
[32m2024-09-24 02:41:06.139[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mF1-Score: 0.7833932817167591[0m
2024/09/24 02:41:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGboost at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/5f3c1dce-743e-4c5a-8b0b-b6b2073d977a.
2024/09/24 02:41:06 INFO mlflow.tracking._tracking_se

[[13900  1166]
 [ 2387  6425]]
              precision    recall  f1-score   support

         0.0       0.85      0.92      0.89     15066
         1.0       0.85      0.73      0.78      8812

    accuracy                           0.85     23878
   macro avg       0.85      0.83      0.84     23878
weighted avg       0.85      0.85      0.85     23878



## LightGBM

In [20]:
with mlflow.start_run(run_name="LGBM"): 

    lgbm_classifier = LGBMClassifier()
    lgbm_classifier.fit(X_train, y_train)

    lgbm_predictions = lgbm_classifier.predict(X_test)

    metrics = {
        "accuracy_score": accuracy_score(y_test, lgbm_predictions),
        "recall_score": recall_score(y_test, lgbm_predictions),
        "precision_score":precision_score(y_test, lgbm_predictions),
        "f1_score": f1_score(y_test, lgbm_predictions)
    }

    mlflow.log_metrics(
        metrics
    )

    logger.info(f"Accuracy: {accuracy_score(y_test, lgbm_predictions)}")
    logger.info(f"Recall: {recall_score(y_test, lgbm_predictions)}")
    logger.info(f"Precision: {precision_score(y_test, lgbm_predictions)}")
    logger.info(f"F1-Score: {f1_score(y_test, lgbm_predictions)}")
    print(confusion_matrix(y_test, lgbm_predictions))
    print(
        classification_report(y_test, lgbm_predictions)
    )

[LightGBM] [Info] Number of positive: 35412, number of negative: 60100
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041556 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 936
[LightGBM] [Info] Number of data points in the train set: 95512, number of used features: 64
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370760 -> initscore=-0.528959
[LightGBM] [Info] Start training from score -0.528959
[[14025  1041]
 [ 2613  6199]]
              precision    recall  f1-score   support

         0.0       0.84      0.93      0.88     15066
         1.0       0.86      0.70      0.77      8812

    accuracy                           0.85     23878
   macro avg       0.85      0.82      0.83     23878
weighted avg       0.85      0.85      0.84     23878



[32m2024-09-24 02:41:09.779[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mAccuracy: 0.8469721082167686[0m
[32m2024-09-24 02:41:09.795[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m20[0m - [1mRecall: 0.7034725374489332[0m
[32m2024-09-24 02:41:09.811[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mPrecision: 0.8562154696132597[0m
[32m2024-09-24 02:41:09.827[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m22[0m - [1mF1-Score: 0.7723648143533516[0m
2024/09/24 02:41:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run LGBM at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/ebc45cb1-5c14-4a43-a5f7-1048cf4be90b.
2024/09/24 02:41:10 INFO mlflow.tracking._tracking_servi

#