# Clase 9: Modelamiento
## DP-100 Azure Data Scientist / DSRP
## Clase 23 Septiembre

### Dataset Utilizado: Hotel Booking

https://www.kaggle.com/code/touba7/hotel-booking

In [56]:
import pandas as pd
import mlflow


from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    classification_report, 
    roc_auc_score, 
    confusion_matrix
)

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from hyperopt import fmin, hp, tpe, Trials
from loguru import logger


# CREDENCIALES AZURE
ml_client = MLClient.from_config(credential=DefaultAzureCredential())
# METADATA ASSET
data_asset = ml_client.data.get("gold-booking-dsrp", version="2")


Found the config file in: /config.json


In [2]:
modeling_dataframe = pd.read_csv(data_asset.path)

## Preparación de los datos

In [3]:
modeling_dataframe = pd.read_csv(data_asset.path)
TARGET_COLUMN = "is_canceled"

X = modeling_dataframe.drop(TARGET_COLUMN, axis=1)
y = modeling_dataframe[TARGET_COLUMN]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1000)

In [4]:
X_train

Unnamed: 0,lead_time,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,is_repeated_guest,previous_cancellations,...,reserved_room_type_H,reserved_room_type_L,reserved_room_type_P,deposit_type_No Deposit,deposit_type_Non Refund,deposit_type_Refundable,customer_type_Contract,customer_type_Group,customer_type_Transient,customer_type_Transient-Party
89468,189.0,21.0,19.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
54838,386.0,31.0,28.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
62385,552.0,2.0,12.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
101395,18.0,46.0,7.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
103342,87.0,51.0,17.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113915,51.0,23.0,9.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3776,5.0,2.0,4.0,1.0,3.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
71751,80.0,28.0,11.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
70231,139.0,24.0,12.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [5]:
X_test

Unnamed: 0,lead_time,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,is_repeated_guest,previous_cancellations,...,reserved_room_type_H,reserved_room_type_L,reserved_room_type_P,deposit_type_No Deposit,deposit_type_Non Refund,deposit_type_Refundable,customer_type_Contract,customer_type_Group,customer_type_Transient,customer_type_Transient-Party
110695,221.0,17.0,28.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
74453,304.0,34.0,17.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
94193,301.0,31.0,30.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1387,30.0,35.0,28.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
7654,29.0,34.0,18.0,2.0,5.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101240,1.0,45.0,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
54272,49.0,29.0,11.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
27544,70.0,35.0,26.0,2.0,5.0,2.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
13809,7.0,38.0,13.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


# Modelamiento

In [7]:
import mlflow
from azureml.core import Workspace

EXPERIMENT_NAME = "DSRP - Booking Cancellation Prediction 2024"

ws = Workspace.from_config()
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

mlflow.create_experiment(EXPERIMENT_NAME)
mlflow.set_experiment(EXPERIMENT_NAME)

mlflow.autolog()

2024/09/26 00:54:27 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/09/26 00:54:27 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2024/09/26 00:54:29 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [58]:
from typing import Any

class MachineLearningProcessor:

    def __init__(self, 
        data: pd.DataFrame, 
        algorithm: any, 
        model_name: str, 
        target:str, 
        params: dict = None):

        self.data = data
        self.algorithm = algorithm
        self.model_name = model_name
        self.target_column = target

    def _split_data(self):

        X = self.data.drop(self.target_column, axis=1)
        y = self.data[self.target_column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1000)

        return X_train, X_test, y_train, y_test

    def __make_pipeline(self, params: dict = None) -> Pipeline:

        if params:
            algorithm = self.algorithm(**params)
        else:
            algorithm = self.algorithm()

        _pipeline = Pipeline(steps=[
            ("std_scaling", StandardScaler()),
            ("classifier",algorithm )
            ]
        )

        return _pipeline

    def optimize_grid_search(self, search_space: dict):

        X_train, X_test, y_train, y_test = self._split_data()
        _pipeline = self.__make_pipeline()
        
        optimizer = GridSearchCV(
            _pipeline,
            param_grid={
                f"classifier__{param}": space 
                for param, space in search_space.items()
            },
            cv=3
        )
        optimizer.fit(X_train, y_train)
        return optimizer.cv_results_

    def optimize_random_search(self, search_space: dict):

        X_train, X_test, y_train, y_test = self._split_data()
        _pipeline = self.__make_pipeline()
        
        optimizer = RandomizedSearchCV(
            _pipeline,
            param_distributions={
                f"classifier__{param}": space 
                for param, space in search_space.items()
            },
            random_state=100,
            n_iter=5,
            cv=3
        )
        optimizer.fit(X_train, y_train)
        return optimizer.best_params_

    def optimize_tpe(self, search_space:dict):

        def objective(params):
            """
            Entrenar modelo y devolver metrica ML
            """
            X_train, X_test, y_train, y_test = self._split_data()
            
            _pipeline = self.__make_pipeline(params=params)

            _pipeline.fit(X_train, y_train)
            predictions = _pipeline.predict(X_test)

            return -accuracy_score(y_test, predictions)

        trials =Trials()
        best = fmin(
            fn=objective,
            space=search_space,
            max_evals=10,
            algo=tpe.suggest
        )
        return best
        
        

    def train(self, params: dict):
        """
        Entrenamiento del model de ML
        """
        mlflow.autolog()
        with mlflow.start_run(run_name=self.model_name):

            X_train, X_test, y_train, y_test = self._split_data()

            _pipeline = self.__make_pipeline(params=params)

            _pipeline.fit(X_train, y_train)
            predictions = _pipeline.predict(X_test)

            metrics = {
                "accuracy_score": accuracy_score(y_test, predictions),
                "recall_score": recall_score(y_test, predictions),
                "precision_score":precision_score(y_test, predictions),
                "f1_score": f1_score(y_test, predictions)
            }

            logger.info(f" {self.model_name} Accuracy: {metrics['accuracy_score']}")
            logger.info(f" {self.model_name} Recall: {metrics['recall_score']}")
            logger.info(f" {self.model_name} Precision: {metrics['precision_score']}")
            logger.info(f" {self.model_name} F1-Score: {metrics['f1_score']}")

            print(confusion_matrix(y_test, predictions))
            print(
                classification_report(y_test, predictions)
            )

# Baseline

In [16]:
dummy_ml_processor = MachineLearningProcessor(
    data=modeling_dataframe,
    algorithm=DummyClassifier,
    model_name="Dummy Classifier with MachineLearningProcessor",
    target=TARGET_COLUMN,
    params= {
        "strategy":"uniform"
    }
)
dummy_ml_processor.train()

2024/09/26 01:18:24 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/09/26 01:18:24 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2024/09/26 01:18:25 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
[32m2024-09-26 01:18:45.374[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m52[0m - [1m Dummy Classifier with MachineLearningProcessor Accuracy: 0.4995812044559846[0m
[32m2024-09-26 01:18:45.376[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m53[0m - [1m Dummy Classifier with MachineLearningProcessor Recall: 0.494098955969133[0m
[32m2024-09-26 01:18:45.378[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m54[0m - [1m Dummy Classifier with MachineLearningProcessor Precision: 0.36758125791473195[0m
[32m2024-09-26 01:18:45.379[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m55[0m - [1m Dummy Classifier with MachineLearningProcessor F1-S

[[7575 7491]
 [4458 4354]]
              precision    recall  f1-score   support

         0.0       0.63      0.50      0.56     15066
         1.0       0.37      0.49      0.42      8812

    accuracy                           0.50     23878
   macro avg       0.50      0.50      0.49     23878
weighted avg       0.53      0.50      0.51     23878



# Regresión Logística

In [17]:
logreg_ml_processor = MachineLearningProcessor(
    data=modeling_dataframe,
    algorithm=LogisticRegression,
    model_name="Logistic Regression Classifier with MachineLearningProcessor",
    target=TARGET_COLUMN,
)
logreg_ml_processor.train()

2024/09/26 01:20:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/09/26 01:20:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2024/09/26 01:20:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
[32m2024-09-26 01:21:03.632[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m52[0m - [1m Logistic REgression Classifier with MachineLearningProcessor Accuracy: 0.815813719742022[0m
[32m2024-09-26 01:21:03.633[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m53[0m - [1m Logistic REgression Classifier with MachineLearningProcessor Recall: 0.6295960054471176[0m
[32m2024-09-26 01:21:03.634[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m54[0m - [1m Logistic REgression Classifier with MachineLearningProcessor Precision: 0.8302903322358576[0m
[32m2024-09-26 01:21:03.635[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m55[0m - [1m Logistic R

[[13932  1134]
 [ 3264  5548]]
              precision    recall  f1-score   support

         0.0       0.81      0.92      0.86     15066
         1.0       0.83      0.63      0.72      8812

    accuracy                           0.82     23878
   macro avg       0.82      0.78      0.79     23878
weighted avg       0.82      0.82      0.81     23878



## Vecino más cercano - KNN

In [None]:
knn_ml_processor = MachineLearningProcessor(
    data=modeling_dataframe,
    algorithm=KNeighborsClassifier,
    model_name="KNN Classifier with MachineLearningProcessor",
    target=TARGET_COLUMN,
    params={
        "n_neighbors": 5
    }
)
knn_ml_processor.train()

# Random Forest

In [None]:
rf_ml_processor = MachineLearningProcessor(
    data=modeling_dataframe,
    algorithm=RandomForestClassifier,
    model_name="RF Classifier with MachineLearningProcessor",
    target=TARGET_COLUMN,
    params={
        "n_estimators": 300,
        "max_depth": 3
    }
)
rf_ml_processor.train()

# Gradient Boosting Machine

In [18]:
gbm_ml_processor = MachineLearningProcessor(
    data=modeling_dataframe,
    algorithm=GradientBoostingClassifier,
    model_name="GBM Classifier with MachineLearningProcessor",
    target=TARGET_COLUMN,
)
gbm_ml_processor.train()

2024/09/26 01:23:25 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/09/26 01:23:25 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2024/09/26 01:23:25 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
[32m2024-09-26 01:24:12.900[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m52[0m - [1m GBM Classifier with MachineLearningProcessor Accuracy: 0.8215093391406315[0m
[32m2024-09-26 01:24:12.901[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m53[0m - [1m GBM Classifier with MachineLearningProcessor Recall: 0.6235814798002723[0m
[32m2024-09-26 01:24:12.902[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m54[0m - [1m GBM Classifier with MachineLearningProcessor Precision: 0.8532608695652174[0m
[32m2024-09-26 01:24:12.903[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m55[0m - [1m GBM Classifier with MachineLearningProcessor F1-Score: 0.

[[14121   945]
 [ 3317  5495]]
              precision    recall  f1-score   support

         0.0       0.81      0.94      0.87     15066
         1.0       0.85      0.62      0.72      8812

    accuracy                           0.82     23878
   macro avg       0.83      0.78      0.79     23878
weighted avg       0.83      0.82      0.81     23878



# XGboost

Guia hiperpametros XGBoost: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning

In [63]:
xgboost_ml_processor = MachineLearningProcessor(
    data=modeling_dataframe,
    algorithm=XGBClassifier,
    model_name="XGBoost Classifier with MachineLearningProcessor",
    target=TARGET_COLUMN,
)
xgboost_ml_processor.optimize_tpe(
    search_space={
        "max_depth": hp.randint("max_depth", 1, 10),
    }
)

100%|██████████| 10/10 [04:35<00:00, 27.52s/trial, best loss: -0.8558924533042969]


2024/09/26 02:36:53 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '073cf58b-cc80-4874-8109-80b1bbc2d5cb', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow

2024/09/26 02:37:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run mighty_kiwi_h9mrqk5g at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/073cf58b-cc80-4874-8109-80b1bbc2d5cb.

2024/09/26 02:37:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1

{'max_depth': 7}

In [64]:
xgboost_ml_processor.train(
    params={'max_depth': 7}
)

2024/09/26 02:41:40 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/09/26 02:41:40 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2024/09/26 02:41:41 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
[32m2024-09-26 02:42:09.767[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m121[0m - [1m XGBoost Classifier with MachineLearningProcessor Accuracy: 0.8558924533042969[0m
[32m2024-09-26 02:42:09.768[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m122[0m - [1m XGBoost Classifier with MachineLearningProcessor Recall: 0.738197911938266[0m
[32m2024-09-26 02:42:09.769[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m123[0m - [1m XGBoost Classifier with MachineLearningProcessor Precision: 0.851551250163634[0m
[32m2024-09-26 02:42:09.772[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m124[0m - [1m XGBoost Classifier with MachineLearningProc

[[13932  1134]
 [ 2307  6505]]
              precision    recall  f1-score   support

         0.0       0.86      0.92      0.89     15066
         1.0       0.85      0.74      0.79      8812

    accuracy                           0.86     23878
   macro avg       0.85      0.83      0.84     23878
weighted avg       0.86      0.86      0.85     23878



## LightGBM

In [59]:
lgbm_ml_processor = MachineLearningProcessor(
    data=modeling_dataframe,
    algorithm=LGBMClassifier,
    model_name="LGBM Classifier with MachineLearningProcessor OPTIMIZED",
    target=TARGET_COLUMN,
)


In [None]:
# OPTIMIZACION DE HIPEPARAMETROS RANDOM SEARCH
lgbm_ml_processor.optimize_random_search(
    search_space={
        "max_depth": [i for i in range(100)],
        "min_data_in_leaf": [100, 500, 200]
    }
)

In [60]:
# OPTIMIZACION DE HIPEPARAMETROS HYPEROPT
lgbm_ml_processor.optimize_tpe(
    search_space={
        "max_depth": hp.randint("max_depth", 1, 10),
    }
)

[LightGBM] [Info] Number of positive: 35412, number of negative: 60100
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.068179 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 996                      
[LightGBM] [Info] Number of data points in the train set: 95512, number of used features: 64
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370760 -> initscore=-0.528959
[LightGBM] [Info] Start training from score -0.528959 
[LightGBM] [Info] Number of positive: 35412, number of negative: 60100            
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.056285 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 996                                                  
[LightGBM] [Info] Number of 

2024/09/26 02:29:52 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'e84ef5fc-ac20-4632-b2eb-72605287cd38', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow

2024/09/26 02:30:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run cyan_feast_cd0dwdqc at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb1836/runs/e84ef5fc-ac20-4632-b2eb-72605287cd38.

2024/09/26 02:30:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://eastus.api.azureml.ms/mlflow/v2.0/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/#/experiments/198d9690-6de9-4f7f-bde2-99cc70cb18

{'max_depth': 9}

In [61]:
lgbm_ml_processor.train(
    params={'max_depth': 9}
)

2024/09/26 02:34:55 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/09/26 02:34:55 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2024/09/26 02:34:56 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
[32m2024-09-26 02:35:25.975[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m121[0m - [1m LGBM Classifier with MachineLearningProcessor OPTIMIZED Accuracy: 0.8424072367870006[0m
[32m2024-09-26 02:35:25.977[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m122[0m - [1m LGBM Classifier with MachineLearningProcessor OPTIMIZED Recall: 0.6929187471629596[0m
[32m2024-09-26 02:35:25.978[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m123[0m - [1m LGBM Classifier with MachineLearningProcessor OPTIMIZED Precision: 0.8524361301130811[0m
[32m2024-09-26 02:35:25.980[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain[0m:[36m124[0m - [1m LGBM Classifier with

[LightGBM] [Info] Number of positive: 35412, number of negative: 60100
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.056439 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 996
[LightGBM] [Info] Number of data points in the train set: 95512, number of used features: 64
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370760 -> initscore=-0.528959
[LightGBM] [Info] Start training from score -0.528959
[[14009  1057]
 [ 2706  6106]]
              precision    recall  f1-score   support

         0.0       0.84      0.93      0.88     15066
         1.0       0.85      0.69      0.76      8812

    accuracy                           0.84     23878
   macro avg       0.85      0.81      0.82     23878
weighted avg       0.84      0.84      0.84     23878



#