## Importando librerías:

In [10]:
%%writefile train_gbt.py

#Librerías
import pandas as pd
import numpy as np
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import  classification_report, accuracy_score, roc_auc_score
import os
from utils import Utils
utils = Utils()

def run():
    #
    # Entrena un modelo sklearn Gradient Boosting Trees...
    #

    import sys
    # Cargando datos...
    data=utils.load_data(path='processed_data_.csv')
    data = data.dropna()

    # Partiendo variable dependientes e independientes...
    X,y = utils.features_target(data, ['is_canceled', 'lead_time', 'arrival_date_week_number',
    'stays_in_weekend_nights', 'stays_in_week_nights', 'adults', 'children',
    'babies', 
    'previous_bookings_not_canceled','year_res_status_date',
    'month_res_status_date', 'day_res_status_date', 'hotel_City Hotel',
    'hotel_Resort Hotel', 'meal_BB', 'meal_FB', 'meal_HB', 'meal_SC',
    'meal_Undefined', 'market_segment_Aviation',
    'market_segment_Complementary', 'market_segment_Corporate',
    'market_segment_Direct', 'market_segment_Groups',
    'market_segment_Offline TA/TO', 'market_segment_Online TA',
    'market_segment_Undefined', 'distribution_channel_Corporate',
    'distribution_channel_Direct', 'distribution_channel_GDS',
    'distribution_channel_TA/TO', 'distribution_channel_Undefined',
    'reserved_room_type_A', 'reserved_room_type_B', 'reserved_room_type_C',
    'reserved_room_type_D', 'reserved_room_type_E', 'reserved_room_type_F',
    'reserved_room_type_G', 'reserved_room_type_H', 'reserved_room_type_L',
    'reserved_room_type_P', 'deposit_type_No Deposit',
    'deposit_type_Non Refund', 'deposit_type_Refundable',
    'customer_type_Contract', 'customer_type_Group',
    'customer_type_Transient', 'customer_type_Transient-Party',
    'reservation_status_processed'], ["is_canceled"])
    
    # Particionamiento entrenamiento y validación...
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=42)

    # Definiendo hiperparámetros...
    learningrate = float(sys.argv[1])
    nestimators = float(sys.argv[2])
    maxdepht = int(sys.argv[3])
    verbose = int(sys.argv[4])


    print('Tracking directory:', mlflow.get_tracking_uri())

    with mlflow.start_run():

        estimator = GradientBoostingClassifier(learning_rate=learningrate, n_estimators=nestimators, max_depth=maxdepht)
        estimator.fit(X_train, y_train)
        accuracy, recall, roc_score = utils.eval_metrics(y_test, y_pred=estimator.predict(X_test))
        if verbose > 0:
            utils.report(estimator, accuracy, recall, roc_score)

            mlflow.log_param("Learning_Rate", learningrate)
            mlflow.log_param("n_estimators", nestimators)
            mlflow.log_param("Max_Depth", maxdepht)

            mlflow.log_metric("Accuracy", accuracy)
            mlflow.log_metric("Recall", recall)
            mlflow.log_metric("Roc_Score", roc_score)

            mlflow.sklearn.log_model(estimator, "model")


if __name__ == "__main__":
    run()

Writing train_gbt.py


In [11]:
%%writefile MLproject
name: Proyecto Bookings

entry_points:
    main:
        parameters:
            learning_rate: {type: float, default: 0.001}
            n_estimators: {type: int, default: 100}
            max_depth: {type: int, default: 5}
            verbose: {type: int, default: 1}
        command: 'python3 train_gbt.py {learning_rate} {n_estimators} {max_depth} {verbose}'

Writing MLproject


In [13]:
#
# Ejecución con parámetros por defecto
#
!mlflow run --env-manager=v /MlFlow/MLproject

Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2800.0_x64__qbz5n2kfra8p0\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2800.0_x64__qbz5n2kfra8p0\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\jdbul\OneDrive\Escritorio\Especializacion-Analitica\Asignaturas\ProductosDatos\DataProducts\Producto_datos\venv\Scripts\mlflow.exe\__main__.py", line 7, in <module>
  File "C:\Users\jdbul\OneDrive\Escritorio\Especializacion-Analitica\Asignaturas\ProductosDatos\DataProducts\Producto_datos\venv\lib\site-packages\click\core.py", line 1130, in __call__
    return self.main(*args, **kwargs)
  File "C:\Users\jdbul\OneDrive\Escritorio\Especializacion-Analitica\Asignaturas\ProductosDatos\DataProducts\Producto_datos\venv\lib\site-packages\click\core.py", line 1055, in main
    rv 