In [1]:
from pipeline import Pipeline
from pipeline.steps import *

xgb_params = {
    "colsample_bylevel": 0.4778015829774066,
    "colsample_bynode": 0.362764358742407,
    "colsample_bytree": 0.7107423488010493,
    "gamma": 1.7094857725240398,
    "learning_rate": 0.02213323588455387,
    "max_depth": 20,
    "max_leaves": 512,
    "min_child_weight": 16,
    "n_estimators": 1667,
    "n_jobs": -1,
    "random_state": 42,
    "reg_alpha": 39.352415706891264,
    "reg_lambda": 75.44843704068275,
    "subsample": 0.06566669853471274,
    "verbose": 0,
}

class FeatureEngineeringProductCatInteractionStep(PipelineStep):

    def __init__(self, cat="cat1", name: Optional[str] = None, tn="tn"):
        super().__init__(name)
        self.cat = cat
        self.tn = tn


    def execute(self, df, df_original=None) -> None:
        # agrupo el dataframe por cat1 (sumando), obteniendo fecha, cat1 y
        # luego paso el dataframe a wide format, donde cada columna es una categoria  y la fila es la suma de tn para cada cat1
        # luego mergeo al dataframe original por fecha y product_id
        df_index = df.index
        if df_original is None:
            df_to_proces = df
        else:
            df_to_proces = df_original
        df_cat = df_to_proces.groupby(["date_id", self.cat]).agg({self.tn: "sum"}).reset_index()
        print(df_cat)
        df_cat = df_cat.pivot(index="date_id", columns=self.cat, values=self.tn).reset_index()
        # paso a string los nombres de las columnas
        df_cat.columns = [f"{self.tn}_{self.cat}_{col}" if col != "date_id" else "date_id" for col in df_cat.columns]
        df = df.merge(df_cat, on="date_id", how="left")
        # vuelvo a setear el indice original
        df.index = df_index
        return {"df": df}
    

import numpy as np
class Log1pTranformation(PipelineStep):
    def execute(self, df):
        df["tn"] = df["tn"].apply(lambda x: np.log1p(x) if x >= 0 else 0)
        return {"df": df}
    
class InverseLog1pTranformation(PipelineStep):
    def execute(self, df, predictions, y_test):
        df["target"] = df["target"].apply(lambda x: np.expm1(x) if x >= 0 else 0)
        predictions = predictions.apply(lambda x: np.expm1(x) if x >= 0 else 0)
        y_test["target"] = y_test["target"].apply(lambda x: np.expm1(x) if x >= 0 else 0)
        return {"df": df, "predictions": predictions, "y_test": y_test}

class GroupByProductStep(PipelineStep):
    def execute(self, df) -> None:
        # Agrupo el dataframe por product_id y fecha, sumando las cantidades
        df = df.groupby(["product_id", "fecha"]).agg({
            'cust_request_qty': 'sum',
            'cust_request_tn': 'sum',
            'tn': 'sum',
            'stock_final': 'max',
            'cat1': 'first',
            'cat2': 'first',
            'cat3': 'first',
            'brand': 'first',
            'sku_size': 'max',
        }).reset_index()
        # le dejo la columna customer_id = 0 para que no crashee el resto
        df['customer_id'] = 1
        return {"df": df}

class FilterDatasetByColumn(PipelineStep):
    def __init__(self, column: str, value, name: Optional[str] = None):
        super().__init__(name)
        self.column = column
        self.value = value
        
    def execute(self, df) -> None:
        # Filtra el DataFrame por el valor de la columna especificada
        df_original = df.copy()
        df_filtered = df[df[self.column] == self.value]
        print(df_filtered.shape)
        return {"df": df_filtered, "df_original": df_original}

class DeleteBadColumns(PipelineStep):
    def execute(self, df) -> None:
        # Elimina  las columnas donde toda sus filas son NaN
        base_columns = df.columns
        df = df.dropna(axis=1, how='all')
        df = df.loc[:, (df != 0).any(axis=0)]
        deleted_columns = set(base_columns) - set(df.columns)
        print(f"Deleted columns: {deleted_columns}")
        return {"df": df}

class CreateResidualTargetStep(PipelineStep):
    def __init__(self, name: Optional[str] = None, target_col: str = 'tn', window: int = 12):
        super().__init__(name)
        self.target_col = target_col
        self.window = window

    def execute(self, df: pd.DataFrame) -> Dict:
        df = df.sort_values(['product_id', 'customer_id', 'fecha']).copy()

        # Valor futuro (shift -2) que será la predicción final
        df['target_shifted'] = df.groupby(['product_id', 'customer_id'])[self.target_col].shift(-2)

        # Cálculo del valor base usando la media rolling pasada
        df['base_prediction'] = (
            df.groupby(['product_id', 'customer_id'])[self.target_col]
            .transform(lambda x: x.rolling(self.window, min_periods=1).mean())
        )

        # Target: residuo a predecir
        df['target'] = df['target_shifted'] - df['base_prediction']
        df.drop(columns=['target_shifted', 'base_prediction'], inplace=True, errors='ignore')

        return {
            "df": df,
            "target_col": self.target_col,
        }

class InverseResidualTargetStep(PipelineStep):
    def __init__(
        self,
        name: Optional[str] = None,
        target_col: str = 'tn',
        window: int = 12
    ):
        super().__init__(name)
        self.target_col = target_col
        self.window = window

    def execute(self, df: pd.DataFrame, predictions, y_test) -> Dict:

        # Orden correcto
        df = df.sort_values(['product_id', 'customer_id', 'fecha'])

        # Recalcular base_prediction exactamente como en CreateResidualTargetStep
        df['base_prediction'] = (
            df.groupby(['product_id', 'customer_id'])[self.target_col]
            .transform(lambda x: x.rolling(self.window, min_periods=1).mean())
        )

        # Invertir el target residual: target_original = residual + base_prediction
        df['target'] = df['target'] + df['base_prediction']

        predictions = predictions + df['base_prediction'].loc[y_test.index]

        y_test = df[["target"]].loc[y_test.index]
        # Limpiar si no querés guardar la base
        df.drop(columns=['base_prediction'], inplace=True)

        return {"df": df, "predictions": predictions, "y_test": y_test}
    



In [2]:
pipeline = Pipeline(
    steps=[
        LoadDataFrameStep("df_intermedio.parquet"),
    ]
)
pipeline.run()

Executing step: LoadDataFrameStep
Step LoadDataFrameStep completed in 2.27 seconds


In [3]:
df = pipeline.get_artifact("df")
# get unique brands
brands = df["brand"].unique()
del df
import gc
gc.collect()
brands

['ARIEL', 'LIMPIEX', 'NATURA', 'DOWNY', 'ROPEX1', ..., 'INDUSTRIAL', NaN, 'TWININGS', 'SKIN1', 'VICHY']
Length: 38
Categories (37, object): ['ARIEL', 'AYUDIN', 'CAPILATIS', 'COLBERT', ..., 'SKIN1', 'TWININGS', 'VICHY', 'VIVERE']

In [4]:
pipeline_results = {}
for brand in brands:
    print(f"Processing brand: {brand}")
    pipeline = Pipeline(
        steps=[
            LoadDataFrameStep("df_intermedio.parquet"),
            #GroupByProductStep(),
            DateRelatedFeaturesStep(),

            FilterDatasetByColumn(column="brand", value=brand),
            #Log1pTranformation(),
            #CreateTargetColumDiffStep(target_col="tn"),
            #CreateTargetColumStep(target_col="tn"),
            CreateResidualTargetStep(target_col="tn", window=12),
            ReduceMemoryUsageStep(),

            #ReduceMemoryUsageStep(),
            FeatureEngineeringLagStep(lags=[1,2,3,5,11,23], columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMeanFeatureStep(window=3, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMaxFeatureStep(window=3, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMinFeatureStep(window=3, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMeanFeatureStep(window=9, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMaxFeatureStep(window=9, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMinFeatureStep(window=9, columns=["tn", "cust_request_qty", "stock_final"]),

            ReduceMemoryUsageStep(),

            RollingStdFeatureStep(window=3, columns=["tn", "cust_request_qty"]),
            RollingStdFeatureStep(window=6, columns=["tn", "cust_request_qty"]),
            RollingStdFeatureStep(window=12, columns=["tn", "cust_request_qty"]), 

            RollingSkewFeatureStep(window=3, columns=["tn", "cust_request_qty"]),
            RollingSkewFeatureStep(window=6, columns=["tn", "cust_request_qty"]),
            RollingSkewFeatureStep(window=12, columns=["tn", "cust_request_qty"]),
            ReduceMemoryUsageStep(),

            RollingZscoreFeatureStep(window=3, columns=["tn", "cust_request_qty"]),
            RollingZscoreFeatureStep(window=6, columns=["tn", "cust_request_qty"]),
            RollingZscoreFeatureStep(window=12, columns=["tn", "cust_request_qty"]),
            DiffFeatureStep(periods=1, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=2, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=3, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=4, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=5, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=11, columns=["tn", "cust_request_qty", "stock_final"]),
            FeatureEngineeringProductCatInteractionStep(cat="cat1", tn="tn"),
            FeatureEngineeringProductCatInteractionStep(cat="cat2", tn="tn"),
            FeatureEngineeringProductCatInteractionStep(cat="cat3", tn="tn"),
            #FeatureEngineeringProductCatInteractionStep(cat="product_id", tn="tn"),

            CreateTotalCategoryStep(cat="cat1"),
            CreateTotalCategoryStep(cat="cat2"),
            CreateTotalCategoryStep(cat="cat3"),
            CreateTotalCategoryStep(cat="brand"),
            CreateTotalCategoryStep(cat="customer_id"),
            CreateTotalCategoryStep(cat="product_id"),
                    
            CreateTotalCategoryStep(cat="cat1", tn="stock_final"),
            CreateTotalCategoryStep(cat="cat2", tn="stock_final"),
            CreateTotalCategoryStep(cat="cat3", tn="stock_final"),

            #ReduceMemoryUsageStep(),
            FeatureDivInteractionStep(columns=[
                    ("tn", "tn_cat1_vendidas"), 
                    ("tn", "tn_cat2_vendidas"), 
                    ("tn", "tn_cat3_vendidas"), 
                    ("tn", "tn_brand_vendidas")]
            ),
            #ReduceMemoryUsageStep(),

            FeatureProdInteractionStep(columns=[("tn", "cust_request_qty")]),
            CreateWeightByCustomerStep(),
            CreateWeightByProductStep(),
            ReduceMemoryUsageStep(),
            DeleteBadColumns(),
            FilterProductsIDStep(dfs=["df"]),
            SplitDataFrameStep2(df="df", test_date=29, gap=1),
            PrepareXYStep(),
            TrainModelStep(params={"num_leaves":31, "feature_fraction":0.2}),
            PredictStep(),
            InverseResidualTargetStep(),
            #InverseLog1pTranformation(),
            InverseScalePredictionsStep(),
            IntegratePredictionsStep(),
            EvaluatePredictionsSteps(),
            EvaluatePredictionsOptimizatedSteps(), 
            #PlotFeatureImportanceStep(),

        ],
        optimize_arftifacts_memory=True
    )

    try: 
        pipeline.run()
        
        # Save the results
        pipeline_results[brand] = {
            "eval_df": pipeline.get_artifact("eval_df"),
            "alpha_opt": pipeline.get_artifact("alpha_opt"),
        }
    except Exception as e:
        print(f"Error processing brand {brand}: {e}")
        continue

Processing brand: ARIEL
Executing step: LoadDataFrameStep
Step LoadDataFrameStep completed in 2.33 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 5.09 seconds
Executing step: FilterDatasetByColumn
(377832, 17)
Step FilterDatasetByColumn completed in 2.74 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 3.45 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 30.63 MB
--- Memory usage after: 15.13 MB
--- Decreased memory usage by 50.6%

Step ReduceMemoryUsageStep completed in 0.06 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.16 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 9.91 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 9.55 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 9.74 seconds
Executing step: RollingMeanF

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 17.1568	valid_0's total_error: 1.22799
[200]	valid_0's l2: 16.1556	valid_0's total_error: 1.11825
[300]	valid_0's l2: 15.5443	valid_0's total_error: 1.06357
[400]	valid_0's l2: 15.6537	valid_0's total_error: 1.04697
[500]	valid_0's l2: 15.6319	valid_0's total_error: 1.04724
[600]	valid_0's l2: 15.5351	valid_0's total_error: 1.02532
[700]	valid_0's l2: 15.7062	valid_0's total_error: 0.978477
[800]	valid_0's l2: 15.5221	valid_0's total_error: 0.963123
[900]	valid_0's l2: 15.482	valid_0's total_error: 1.00196
[1000]	valid_0's l2: 15.3084	valid_0's total_error: 0.987926
[1100]	valid_0's l2: 15.3274	valid_0's total_error: 0.989223
[1200]	valid_0's l2: 15.2276	valid_0's total_error: 0.994576
[1300]	valid_0's l2: 15.0462	valid_0's total_error: 0.975038
[1400]	valid_0's l2: 15.104	valid_0's total_error: 0.970402
[1500]	valid_0's l2: 15.236	valid_0's total_error: 0.990322
Step TrainModelStep completed in 30.89 seconds
Executing step: PredictStep
Step PredictStep completed in

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.38 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.02 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.02 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.2849

Top 5 productos con mayor error absoluto:
   product_id       target  predictions      tn_real      tn_pred  \
0       20001  1261.322642  1515.850805  1261.322642  1515.850805   
1       20045   115.114431   183.234329   115.114431   183.234329   
7       20649    11.354645    75.591061    11.354645    75.591061   
9       20691    23.896790    56.474383    23.896790    56.474383   
4       20150    68.611140    88.441539    68.611140    88.441539   

   error_absoluto  
0      254.528162  
1       68.119897  
7       64.236416  
9       32.577593  
4       19.830400  
Step EvaluatePredictionsSteps completed in 0.03 seconds
Executing step: EvaluatePredictionsOptimizated

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 3.78856	valid_0's total_error: 0.907035
[200]	valid_0's l2: 3.75078	valid_0's total_error: 0.913153
[300]	valid_0's l2: 3.77037	valid_0's total_error: 0.91117
[400]	valid_0's l2: 3.79293	valid_0's total_error: 0.903135
[500]	valid_0's l2: 3.81633	valid_0's total_error: 0.903273
[600]	valid_0's l2: 3.79109	valid_0's total_error: 0.893877
[700]	valid_0's l2: 3.81373	valid_0's total_error: 0.89109
[800]	valid_0's l2: 3.8415	valid_0's total_error: 0.889429
[900]	valid_0's l2: 3.80857	valid_0's total_error: 0.88079
[1000]	valid_0's l2: 3.81585	valid_0's total_error: 0.876896
[1100]	valid_0's l2: 3.80188	valid_0's total_error: 0.86887
[1200]	valid_0's l2: 3.77001	valid_0's total_error: 0.862312
[1300]	valid_0's l2: 3.78262	valid_0's total_error: 0.86417
[1400]	valid_0's l2: 3.80267	valid_0's total_error: 0.867376
[1500]	valid_0's l2: 3.78441	valid_0's total_error: 0.865198
Step TrainModelStep completed in 141.55 seconds
Executing step: PredictStep
Step PredictStep complet

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 5.72 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.07 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.09 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.4792

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real      tn_pred  \
0       20002  813.761120  1152.078789  813.761120  1152.078789   
2       20008  233.032150   476.626856  233.032150   476.626856   
1       20006  262.751018   481.257253  262.751018   481.257253   
3       20010  199.851811   405.018733  199.851811   405.018733   
7       20018  127.573596   299.496817  127.573596   299.496817   

   error_absoluto  
0      338.317669  
2      243.594707  
1      218.506236  
3      205.166922  
7      171.923221  
Step EvaluatePredictionsSteps completed in 0.14 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa o

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 1.81916	valid_0's total_error: 0.860185
[200]	valid_0's l2: 1.7694	valid_0's total_error: 0.813957
[300]	valid_0's l2: 1.73532	valid_0's total_error: 0.76217
[400]	valid_0's l2: 1.72236	valid_0's total_error: 0.7249
[500]	valid_0's l2: 1.75941	valid_0's total_error: 0.718328
[600]	valid_0's l2: 1.77191	valid_0's total_error: 0.695597
[700]	valid_0's l2: 1.76746	valid_0's total_error: 0.683267
[800]	valid_0's l2: 1.77685	valid_0's total_error: 0.68315
[900]	valid_0's l2: 1.77099	valid_0's total_error: 0.678503
[1000]	valid_0's l2: 1.79585	valid_0's total_error: 0.665271
[1100]	valid_0's l2: 1.81978	valid_0's total_error: 0.660034
[1200]	valid_0's l2: 1.81451	valid_0's total_error: 0.648954
[1300]	valid_0's l2: 1.82449	valid_0's total_error: 0.639228
[1400]	valid_0's l2: 1.81833	valid_0's total_error: 0.631113
[1500]	valid_0's l2: 1.8139	valid_0's total_error: 0.627591
Step TrainModelStep completed in 59.29 seconds
Executing step: PredictStep
Step PredictStep complete

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 3.44 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.04 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.05 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.1800

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
1       20004  482.038922   593.594712  482.038922  593.594712      111.555790
9       20119  165.810220    74.266432  165.810220   74.266432       91.543789
3       20019  231.815264   306.862243  231.815264  306.862243       75.046979
0       20003  635.607411   700.496438  635.607411  700.496438       64.889026
4       20033  224.326797   178.948073  224.326797  178.948073       45.378723
Step EvaluatePredictionsSteps completed in 0.06 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.9310
Error en train: 0.1486
Error en

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 2.52084	valid_0's total_error: 0.852506
[200]	valid_0's l2: 2.52677	valid_0's total_error: 0.809197
[300]	valid_0's l2: 2.52633	valid_0's total_error: 0.780444
[400]	valid_0's l2: 2.57723	valid_0's total_error: 0.775566
[500]	valid_0's l2: 2.67722	valid_0's total_error: 0.798835
[600]	valid_0's l2: 2.70399	valid_0's total_error: 0.801727
[700]	valid_0's l2: 2.76038	valid_0's total_error: 0.804115
[800]	valid_0's l2: 2.75666	valid_0's total_error: 0.790632
[900]	valid_0's l2: 2.83583	valid_0's total_error: 0.789424
[1000]	valid_0's l2: 2.91407	valid_0's total_error: 0.798108
[1100]	valid_0's l2: 2.9417	valid_0's total_error: 0.795517
[1200]	valid_0's l2: 2.99883	valid_0's total_error: 0.79837
[1300]	valid_0's l2: 3.01197	valid_0's total_error: 0.796055
[1400]	valid_0's l2: 3.03549	valid_0's total_error: 0.784313
[1500]	valid_0's l2: 3.05238	valid_0's total_error: 0.778366
Step TrainModelStep completed in 61.10 seconds
Executing step: PredictStep
Step PredictStep comp

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.65 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.03 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.03 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3013

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
2       20014  262.116031   430.725765  262.116031  430.725765      168.609734
0       20007  307.829776   448.582901  307.829776  448.582901      140.753124
1       20012  301.510898   354.465607  301.510898  354.465607       52.954709
4       20051  128.834699   156.433212  128.834699  156.433212       27.598513
3       20049  211.369621   228.468219  211.369621  228.468219       17.098598
Step EvaluatePredictionsSteps completed in 0.04 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.6601
Error en train: 0.1263
Error en

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 3.68949	valid_0's total_error: 1.07777
[200]	valid_0's l2: 3.80919	valid_0's total_error: 1.12535
[300]	valid_0's l2: 3.92281	valid_0's total_error: 1.15864
[400]	valid_0's l2: 3.96463	valid_0's total_error: 1.18363
[500]	valid_0's l2: 4.02173	valid_0's total_error: 1.209
[600]	valid_0's l2: 4.10405	valid_0's total_error: 1.22197
[700]	valid_0's l2: 4.12755	valid_0's total_error: 1.2228
[800]	valid_0's l2: 4.1452	valid_0's total_error: 1.21539
[900]	valid_0's l2: 4.15095	valid_0's total_error: 1.21933
[1000]	valid_0's l2: 4.11727	valid_0's total_error: 1.20576
[1100]	valid_0's l2: 4.13867	valid_0's total_error: 1.19558
[1200]	valid_0's l2: 4.18314	valid_0's total_error: 1.19633
[1300]	valid_0's l2: 4.17994	valid_0's total_error: 1.19017
[1400]	valid_0's l2: 4.19162	valid_0's total_error: 1.19011
[1500]	valid_0's l2: 4.22011	valid_0's total_error: 1.18321
Step TrainModelStep completed in 61.00 seconds
Executing step: PredictStep
Step PredictStep completed in 0.34 sec

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.71 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.05 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.03 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.6231

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
2       20028   61.665009   237.904734   61.665009  237.904734      176.239725
1       20015  277.524176   426.091565  277.524176  426.091565      148.567388
7       20071   53.545999   144.285547   53.545999  144.285547       90.739548
3       20038  116.561168   199.230429  116.561168  199.230429       82.669261
6       20069   66.775170   135.744099   66.775170  135.744099       68.968929
Step EvaluatePredictionsSteps completed in 0.04 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.5493
Error en train: 0.1967
Error en

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.35462	valid_0's total_error: 0.955153
[200]	valid_0's l2: 0.358506	valid_0's total_error: 0.968249
[300]	valid_0's l2: 0.355427	valid_0's total_error: 0.974117
[400]	valid_0's l2: 0.356096	valid_0's total_error: 0.985563
[500]	valid_0's l2: 0.358831	valid_0's total_error: 0.999629
[600]	valid_0's l2: 0.361454	valid_0's total_error: 1.00647
[700]	valid_0's l2: 0.36488	valid_0's total_error: 1.00696
[800]	valid_0's l2: 0.362132	valid_0's total_error: 1.0014
[900]	valid_0's l2: 0.362712	valid_0's total_error: 1.00161
[1000]	valid_0's l2: 0.364753	valid_0's total_error: 1.01654
[1100]	valid_0's l2: 0.365324	valid_0's total_error: 1.02153
[1200]	valid_0's l2: 0.363745	valid_0's total_error: 1.01631
[1300]	valid_0's l2: 0.364088	valid_0's total_error: 1.01571
[1400]	valid_0's l2: 0.362464	valid_0's total_error: 1.01563
[1500]	valid_0's l2: 0.36319	valid_0's total_error: 1.01395
Step TrainModelStep completed in 178.03 seconds
Executing step: PredictStep
Step PredictStep 

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 7.81 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.09 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.13 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.4132

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
0       20013  191.958297   383.506547  191.958297  383.506547      191.548250
4       20057   76.759070   144.860580   76.759070  144.860580       68.101510
3       20053  101.155097   146.469255  101.155097  146.469255       45.314158
5       20063   86.059974   126.762097   86.059974  126.762097       40.702124
2       20042  119.015616   157.020070  119.015616  157.020070       38.004455
Step EvaluatePredictionsSteps completed in 0.17 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.6714
Error en train: 0.2653
Error en

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.738014	valid_0's total_error: 0.804745
[200]	valid_0's l2: 0.755152	valid_0's total_error: 0.73807
[300]	valid_0's l2: 0.751602	valid_0's total_error: 0.766467
[400]	valid_0's l2: 0.760972	valid_0's total_error: 0.789234
[500]	valid_0's l2: 0.763277	valid_0's total_error: 0.796728
[600]	valid_0's l2: 0.762212	valid_0's total_error: 0.74349
[700]	valid_0's l2: 0.754087	valid_0's total_error: 0.794115
[800]	valid_0's l2: 0.746371	valid_0's total_error: 0.804693
[900]	valid_0's l2: 0.748083	valid_0's total_error: 0.763103
[1000]	valid_0's l2: 0.75908	valid_0's total_error: 0.730448
[1100]	valid_0's l2: 0.755455	valid_0's total_error: 0.72782
[1200]	valid_0's l2: 0.752718	valid_0's total_error: 0.723851
[1300]	valid_0's l2: 0.749085	valid_0's total_error: 0.712164
[1400]	valid_0's l2: 0.752872	valid_0's total_error: 0.727795
[1500]	valid_0's l2: 0.751786	valid_0's total_error: 0.740731
Step TrainModelStep completed in 37.03 seconds
Executing step: PredictStep
Step Pre

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.99 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.01 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.02 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.0670

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
1       20100  122.506865   107.571330  122.506865  107.571330       14.935535
2       20211   23.234500    29.048703   23.234500   29.048703        5.814203
0       20023  262.634825   257.876754  262.634825  257.876754        4.758072
4       20684    2.354284     4.227180    2.354284    4.227180        1.872896
3       20375   11.554384    12.456332   11.554384   12.456332        0.901948
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.8071
Error en train: 0.0168
Error en

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 24.3906	valid_0's total_error: 0.722313
[200]	valid_0's l2: 24.5582	valid_0's total_error: 0.727095
[300]	valid_0's l2: 24.6964	valid_0's total_error: 0.788791
[400]	valid_0's l2: 24.8766	valid_0's total_error: 0.76043
[500]	valid_0's l2: 24.7906	valid_0's total_error: 0.74357
[600]	valid_0's l2: 24.9466	valid_0's total_error: 0.764184
[700]	valid_0's l2: 24.9268	valid_0's total_error: 0.784256
[800]	valid_0's l2: 25.0969	valid_0's total_error: 0.829174
[900]	valid_0's l2: 25.2416	valid_0's total_error: 0.866242
[1000]	valid_0's l2: 25.2487	valid_0's total_error: 0.8737
[1100]	valid_0's l2: 25.3081	valid_0's total_error: 0.867626
[1200]	valid_0's l2: 25.4477	valid_0's total_error: 0.890444
[1300]	valid_0's l2: 25.4841	valid_0's total_error: 0.899465
[1400]	valid_0's l2: 25.4325	valid_0's total_error: 0.895318
[1500]	valid_0's l2: 25.5421	valid_0's total_error: 0.941065
Step TrainModelStep completed in 16.44 seconds
Executing step: PredictStep
Step PredictStep comple

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.35 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.01 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.01 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3614

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
0       20032  703.110658   448.252520  703.110658  448.252520      254.858138
1       20089  287.991140   184.650549  287.991140  184.650549      103.340591
Step EvaluatePredictionsSteps completed in 0.01 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 1.5240
Error en train: 0.0634
Error en test: 0.0899

Top 5 productos con mayor error absoluto en train:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
1       20089  169.876016   138.582678  169.876016  138.582678    3.129334e+01
0       20032

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0388946	valid_0's total_error: 0.884617
[200]	valid_0's l2: 0.037131	valid_0's total_error: 0.8261
[300]	valid_0's l2: 0.0367619	valid_0's total_error: 0.797467
[400]	valid_0's l2: 0.0369373	valid_0's total_error: 0.772303
[500]	valid_0's l2: 0.0367598	valid_0's total_error: 0.74808
[600]	valid_0's l2: 0.0365022	valid_0's total_error: 0.727548
[700]	valid_0's l2: 0.0366518	valid_0's total_error: 0.71888
[800]	valid_0's l2: 0.0371569	valid_0's total_error: 0.714669
[900]	valid_0's l2: 0.0374387	valid_0's total_error: 0.706781
[1000]	valid_0's l2: 0.0373974	valid_0's total_error: 0.709559
[1100]	valid_0's l2: 0.037464	valid_0's total_error: 0.713299
[1200]	valid_0's l2: 0.0380174	valid_0's total_error: 0.716508
[1300]	valid_0's l2: 0.0387801	valid_0's total_error: 0.7253
[1400]	valid_0's l2: 0.0386952	valid_0's total_error: 0.722189
[1500]	valid_0's l2: 0.038824	valid_0's total_error: 0.718076
Step TrainModelStep completed in 238.97 seconds
Executing step: PredictSt

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 11.02 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.12 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.19 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.5205

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
0       20037  101.306992   150.935746  101.306992  150.935746       49.628753
2       20077   64.078992   102.063124   64.078992  102.063124       37.984132
3       20079   41.167674    79.137845   41.167674   79.137845       37.970171
4       20090   42.831797    76.738673   42.831797   76.738673       33.906876
7       20216   13.266164    29.690588   13.266164   29.690588       16.424424
Step EvaluatePredictionsSteps completed in 0.19 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.7648
Error en train: 0.2692
Error e

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.327331	valid_0's total_error: 1.10297
[200]	valid_0's l2: 0.341522	valid_0's total_error: 1.21351
[300]	valid_0's l2: 0.345992	valid_0's total_error: 1.26655
[400]	valid_0's l2: 0.342925	valid_0's total_error: 1.29345
[500]	valid_0's l2: 0.343781	valid_0's total_error: 1.31257
[600]	valid_0's l2: 0.345007	valid_0's total_error: 1.33622
[700]	valid_0's l2: 0.341674	valid_0's total_error: 1.33422
[800]	valid_0's l2: 0.34097	valid_0's total_error: 1.33202
[900]	valid_0's l2: 0.338622	valid_0's total_error: 1.34246
[1000]	valid_0's l2: 0.340343	valid_0's total_error: 1.35605
[1100]	valid_0's l2: 0.339294	valid_0's total_error: 1.34249
[1200]	valid_0's l2: 0.338505	valid_0's total_error: 1.34402
[1300]	valid_0's l2: 0.339976	valid_0's total_error: 1.35134
[1400]	valid_0's l2: 0.340602	valid_0's total_error: 1.35741
[1500]	valid_0's l2: 0.34239	valid_0's total_error: 1.36622
Step TrainModelStep completed in 78.11 seconds
Executing step: PredictStep
Step PredictStep comp

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 3.28 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.04 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.05 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.7200

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real     tn_pred  error_absoluto
0       20044   67.952581   179.394719   67.952581  179.394719      111.442138
9       20143   31.294566   114.097708   31.294566  114.097708       82.803143
2       20054   70.357692   134.914240   70.357692  134.914240       64.556548
1       20047  113.462266   172.625634  113.462266  172.625634       59.163367
8       20130   51.066470   107.666449   51.066470  107.666449       56.599980
Step EvaluatePredictionsSteps completed in 0.05 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.6082
Error en train: 0.2376
Error en

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.270957	valid_0's total_error: 0.559503
[200]	valid_0's l2: 0.267189	valid_0's total_error: 0.828419
[300]	valid_0's l2: 0.27834	valid_0's total_error: 1.01626
[400]	valid_0's l2: 0.285337	valid_0's total_error: 1.08388
[500]	valid_0's l2: 0.291719	valid_0's total_error: 1.16803
[600]	valid_0's l2: 0.297376	valid_0's total_error: 1.20655
[700]	valid_0's l2: 0.299963	valid_0's total_error: 1.19242
[800]	valid_0's l2: 0.303369	valid_0's total_error: 1.21591
[900]	valid_0's l2: 0.304934	valid_0's total_error: 1.18399
[1000]	valid_0's l2: 0.308372	valid_0's total_error: 1.19604
[1100]	valid_0's l2: 0.313216	valid_0's total_error: 1.19661
[1200]	valid_0's l2: 0.321179	valid_0's total_error: 1.24965
[1300]	valid_0's l2: 0.324751	valid_0's total_error: 1.27843
[1400]	valid_0's l2: 0.330685	valid_0's total_error: 1.27749
[1500]	valid_0's l2: 0.330043	valid_0's total_error: 1.26942
Step TrainModelStep completed in 51.91 seconds
Executing step: PredictStep
Step PredictStep c

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.29 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.02 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.03 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.2782

Top 5 productos con mayor error absoluto:
   product_id      target  predictions     tn_real    tn_pred  error_absoluto
1       20056  116.210440    85.905031  116.210440  85.905031       30.305409
0       20055  113.078862    94.109993  113.078862  94.109993       18.968869
3       20473   14.563774     5.783428   14.563774   5.783428        8.780346
2       20304    4.543392    11.302317    4.543392  11.302317        6.758924
8       20724    0.216113     2.256452    0.216113   2.256452        2.040339
Step EvaluatePredictionsSteps completed in 0.04 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 1.4423
Error en train: 0.0940
Error en test:

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0342295	valid_0's total_error: 0.96028
[200]	valid_0's l2: 0.0350739	valid_0's total_error: 1.04892
[300]	valid_0's l2: 0.0361262	valid_0's total_error: 1.13189
[400]	valid_0's l2: 0.0371863	valid_0's total_error: 1.1919
[500]	valid_0's l2: 0.038341	valid_0's total_error: 1.24137
[600]	valid_0's l2: 0.0392012	valid_0's total_error: 1.26437
[700]	valid_0's l2: 0.0397157	valid_0's total_error: 1.2836
[800]	valid_0's l2: 0.0400949	valid_0's total_error: 1.308
[900]	valid_0's l2: 0.0406414	valid_0's total_error: 1.32295
[1000]	valid_0's l2: 0.0411068	valid_0's total_error: 1.33074
[1100]	valid_0's l2: 0.0415706	valid_0's total_error: 1.34787
[1200]	valid_0's l2: 0.041643	valid_0's total_error: 1.35217
[1300]	valid_0's l2: 0.041684	valid_0's total_error: 1.35998
[1400]	valid_0's l2: 0.0418406	valid_0's total_error: 1.35869
[1500]	valid_0's l2: 0.04191	valid_0's total_error: 1.36149
Step TrainModelStep completed in 237.93 seconds
Executing step: PredictStep
Step Predict

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 12.43 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.12 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.18 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3114

Top 5 productos con mayor error absoluto:
    product_id      target  predictions     tn_real     tn_pred  \
0        20058  128.549485   101.906535  128.549485  101.906535   
1        20122   52.858464    68.304929   52.858464   68.304929   
34       20488    6.655384    16.700451    6.655384   16.700451   
2        20189   27.528275    36.919336   27.528275   36.919336   
9        20298   14.840791    23.782788   14.840791   23.782788   

    error_absoluto  
0        26.642951  
1        15.446466  
34       10.045066  
2         9.391060  
9         8.941997  
Step EvaluatePredictionsSteps completed in 0.18 seconds
Executing step: EvaluatePredictionsOptimizatedSteps

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0441797	valid_0's total_error: 0.720616
[200]	valid_0's l2: 0.0418502	valid_0's total_error: 0.601275
[300]	valid_0's l2: 0.0411375	valid_0's total_error: 0.578569
[400]	valid_0's l2: 0.0408508	valid_0's total_error: 0.579363
[500]	valid_0's l2: 0.0407256	valid_0's total_error: 0.585354
[600]	valid_0's l2: 0.0408852	valid_0's total_error: 0.579537
[700]	valid_0's l2: 0.040814	valid_0's total_error: 0.568244
[800]	valid_0's l2: 0.0407599	valid_0's total_error: 0.550668
[900]	valid_0's l2: 0.0410226	valid_0's total_error: 0.550581
[1000]	valid_0's l2: 0.0418442	valid_0's total_error: 0.551934
[1100]	valid_0's l2: 0.042282	valid_0's total_error: 0.561801
[1200]	valid_0's l2: 0.0426001	valid_0's total_error: 0.574994
[1300]	valid_0's l2: 0.0424489	valid_0's total_error: 0.578893
[1400]	valid_0's l2: 0.042348	valid_0's total_error: 0.578508
[1500]	valid_0's l2: 0.0424101	valid_0's total_error: 0.573753
Step TrainModelStep completed in 54.83 seconds
Executing step: Pred

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.96 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.03 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.04 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3175

Top 5 productos con mayor error absoluto:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
2       20138  19.238169    41.570625  19.238169  41.570625       22.332456
0       20062  79.208416    92.263418  79.208416  92.263418       13.055002
1       20117  37.192701    42.877253  37.192701  42.877253        5.684552
5       20491   9.630635    14.315953   9.630635  14.315953        4.685319
7       20757   2.978947     7.248350   2.978947   7.248350        4.269404
Step EvaluatePredictionsSteps completed in 0.04 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.8546
Error en train: 0.1236
Error en test: 0.2305

Top

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0539981	valid_0's total_error: 0.991097
[200]	valid_0's l2: 0.0545168	valid_0's total_error: 1.00127
[300]	valid_0's l2: 0.0548312	valid_0's total_error: 1.0065
[400]	valid_0's l2: 0.0551545	valid_0's total_error: 1.01081
[500]	valid_0's l2: 0.0555296	valid_0's total_error: 1.01283
[600]	valid_0's l2: 0.0559312	valid_0's total_error: 1.01593
[700]	valid_0's l2: 0.0559738	valid_0's total_error: 1.01708
[800]	valid_0's l2: 0.0563027	valid_0's total_error: 1.01527
[900]	valid_0's l2: 0.0564714	valid_0's total_error: 1.01928
[1000]	valid_0's l2: 0.0567362	valid_0's total_error: 1.01755
[1100]	valid_0's l2: 0.0569113	valid_0's total_error: 1.0201
[1200]	valid_0's l2: 0.0573418	valid_0's total_error: 1.02102
[1300]	valid_0's l2: 0.0573633	valid_0's total_error: 1.02307
[1400]	valid_0's l2: 0.0574555	valid_0's total_error: 1.02162
[1500]	valid_0's l2: 0.0575475	valid_0's total_error: 1.02041
Step TrainModelStep completed in 196.60 seconds
Executing step: PredictStep
Step

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 9.09 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.10 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.14 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3050

Top 5 productos con mayor error absoluto:
    product_id     target  predictions    tn_real    tn_pred  error_absoluto
7        20125  85.396235    55.391634  85.396235  55.391634       30.004601
4        20111  51.954616    74.748391  51.954616  74.748391       22.793774
9        20133  68.298365    50.587505  68.298365  50.587505       17.710860
0        20081  66.781934    84.283399  66.781934  84.283399       17.501466
12       20140  63.781709    47.986685  63.781709  47.986685       15.795024
Step EvaluatePredictionsSteps completed in 0.17 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.9280
Error en train: 0.3332
Error en test: 0.290

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.439231	valid_0's total_error: 0.81697
[200]	valid_0's l2: 0.44507	valid_0's total_error: 0.785918
[300]	valid_0's l2: 0.443729	valid_0's total_error: 0.78324
[400]	valid_0's l2: 0.44599	valid_0's total_error: 0.779276
[500]	valid_0's l2: 0.45362	valid_0's total_error: 0.759467
[600]	valid_0's l2: 0.454565	valid_0's total_error: 0.755068
[700]	valid_0's l2: 0.455507	valid_0's total_error: 0.757518
[800]	valid_0's l2: 0.461013	valid_0's total_error: 0.750383
[900]	valid_0's l2: 0.461196	valid_0's total_error: 0.756867
[1000]	valid_0's l2: 0.465534	valid_0's total_error: 0.756453
[1100]	valid_0's l2: 0.46985	valid_0's total_error: 0.749957
[1200]	valid_0's l2: 0.471509	valid_0's total_error: 0.753936
[1300]	valid_0's l2: 0.464994	valid_0's total_error: 0.766727
[1400]	valid_0's l2: 0.467518	valid_0's total_error: 0.76098
[1500]	valid_0's l2: 0.468711	valid_0's total_error: 0.754989
Step TrainModelStep completed in 37.62 seconds
Executing step: PredictStep
Step Predic

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0579537	valid_0's total_error: 1.0291
[200]	valid_0's l2: 0.0614298	valid_0's total_error: 1.06779
[300]	valid_0's l2: 0.0614503	valid_0's total_error: 1.06065
[400]	valid_0's l2: 0.0675828	valid_0's total_error: 1.07479
[500]	valid_0's l2: 0.0716138	valid_0's total_error: 1.08403
[600]	valid_0's l2: 0.0742974	valid_0's total_error: 1.05848
[700]	valid_0's l2: 0.0764414	valid_0's total_error: 1.06192
[800]	valid_0's l2: 0.0789263	valid_0's total_error: 1.04834
[900]	valid_0's l2: 0.0806651	valid_0's total_error: 1.02526
[1000]	valid_0's l2: 0.0832988	valid_0's total_error: 1.02528
[1100]	valid_0's l2: 0.0842514	valid_0's total_error: 1.01769
[1200]	valid_0's l2: 0.086833	valid_0's total_error: 1.00786
[1300]	valid_0's l2: 0.0888397	valid_0's total_error: 0.996085
[1400]	valid_0's l2: 0.089156	valid_0's total_error: 0.995261
[1500]	valid_0's l2: 0.0908915	valid_0's total_error: 0.980483
Step TrainModelStep completed in 30.98 seconds
Executing step: PredictStep
Step

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.87 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.01 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.01 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.6532

Top 5 productos con mayor error absoluto:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
0       20086  45.602805    79.012338  45.602805  79.012338       33.409534
1       20096  51.768200    81.513957  51.768200  81.513957       29.745757
2       20558  10.992732    16.801206  10.992732  16.801206        5.808474
3       20689   6.166529    10.962184   6.166529  10.962184        4.795655
4       20817   4.754316     8.563429   4.754316   8.563429        3.809113
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.5532
Error en train: 0.1033
Error en test: 0.2008

Top

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.16905	valid_0's total_error: 1.08794
[200]	valid_0's l2: 0.174786	valid_0's total_error: 1.08331
[300]	valid_0's l2: 0.177319	valid_0's total_error: 1.0731
[400]	valid_0's l2: 0.179927	valid_0's total_error: 1.0556
[500]	valid_0's l2: 0.181132	valid_0's total_error: 1.06775
[600]	valid_0's l2: 0.181473	valid_0's total_error: 1.07833
[700]	valid_0's l2: 0.182879	valid_0's total_error: 1.08569
[800]	valid_0's l2: 0.184539	valid_0's total_error: 1.09992
[900]	valid_0's l2: 0.186075	valid_0's total_error: 1.10621
[1000]	valid_0's l2: 0.18678	valid_0's total_error: 1.11025
[1100]	valid_0's l2: 0.186339	valid_0's total_error: 1.11859
[1200]	valid_0's l2: 0.187228	valid_0's total_error: 1.11639
[1300]	valid_0's l2: 0.188922	valid_0's total_error: 1.13413
[1400]	valid_0's l2: 0.189548	valid_0's total_error: 1.1384
[1500]	valid_0's l2: 0.190962	valid_0's total_error: 1.14497
Step TrainModelStep completed in 31.52 seconds
Executing step: PredictStep
Step PredictStep complet

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.88 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.02 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.02 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.2825

Top 5 productos con mayor error absoluto:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
2       20175  22.958350    46.067781  22.958350  46.067781       23.109431
4       20299  42.858681    23.067616  42.858681  23.067616       19.791065
0       20107  90.783370    79.555848  90.783370  79.555848       11.227522
3       20270  14.757801    23.590270  14.757801  23.590270        8.832469
1       20161  57.782578    53.222361  57.782578  53.222361        4.560216
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 1.2333
Error en train: 0.3598
Error en test: 0.3738

Top

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0402019	valid_0's total_error: 1.46799
[200]	valid_0's l2: 0.0411673	valid_0's total_error: 1.50909
[300]	valid_0's l2: 0.0432883	valid_0's total_error: 1.5876
[400]	valid_0's l2: 0.0442877	valid_0's total_error: 1.61862
[500]	valid_0's l2: 0.0462439	valid_0's total_error: 1.66562
[600]	valid_0's l2: 0.0463821	valid_0's total_error: 1.6921
[700]	valid_0's l2: 0.0461759	valid_0's total_error: 1.70494
[800]	valid_0's l2: 0.0469589	valid_0's total_error: 1.72022
[900]	valid_0's l2: 0.0487335	valid_0's total_error: 1.76954
[1000]	valid_0's l2: 0.0486576	valid_0's total_error: 1.76012
[1100]	valid_0's l2: 0.0488064	valid_0's total_error: 1.77038
[1200]	valid_0's l2: 0.0499164	valid_0's total_error: 1.79948
[1300]	valid_0's l2: 0.0502772	valid_0's total_error: 1.78613
[1400]	valid_0's l2: 0.0506129	valid_0's total_error: 1.78203
[1500]	valid_0's l2: 0.050778	valid_0's total_error: 1.78017
Step TrainModelStep completed in 37.48 seconds
Executing step: PredictStep
Step Pr

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.76 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.01 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.01 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.7648

Top 5 productos con mayor error absoluto:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
0       20142  49.201926    77.764030  49.201926  77.764030       28.562103
2       20259  12.400438    27.937460  12.400438  27.937460       15.537022
1       20240  18.321323    33.625657  18.321323  33.625657       15.304334
3       20285  14.882624    25.795932  14.882624  25.795932       10.913307
4       20565   4.947940    10.927624   4.947940  10.927624        5.979685
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.4531
Error en train: 0.1339
Error en test: 0.3351

Top

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.113138	valid_0's total_error: 0.940233
[200]	valid_0's l2: 0.115619	valid_0's total_error: 0.996549
[300]	valid_0's l2: 0.116916	valid_0's total_error: 1.02694
[400]	valid_0's l2: 0.11835	valid_0's total_error: 1.05052
[500]	valid_0's l2: 0.119637	valid_0's total_error: 1.05743
[600]	valid_0's l2: 0.120105	valid_0's total_error: 1.06161
[700]	valid_0's l2: 0.121543	valid_0's total_error: 1.06158
[800]	valid_0's l2: 0.121654	valid_0's total_error: 1.06154
[900]	valid_0's l2: 0.122247	valid_0's total_error: 1.05372
[1000]	valid_0's l2: 0.122334	valid_0's total_error: 1.0541
[1100]	valid_0's l2: 0.122646	valid_0's total_error: 1.05946
[1200]	valid_0's l2: 0.123397	valid_0's total_error: 1.06614
[1300]	valid_0's l2: 0.123591	valid_0's total_error: 1.07507
[1400]	valid_0's l2: 0.123895	valid_0's total_error: 1.07997
[1500]	valid_0's l2: 0.124535	valid_0's total_error: 1.09631
Step TrainModelStep completed in 211.28 seconds
Executing step: PredictStep
Step PredictStep c

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 9.37 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.11 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.22 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.2681

Top 5 productos con mayor error absoluto:
    product_id     target  predictions    tn_real    tn_pred  error_absoluto
50       20364  36.096722    21.366981  36.096722  21.366981       14.729741
54       20389  35.981108    21.712443  35.981108  21.712443       14.268665
48       20353   0.702694    14.287284   0.702694  14.287284       13.584590
5        20176  24.777779    36.074002  24.777779  36.074002       11.296223
9        20182  25.452783    35.557282  25.452783  35.557282       10.104499
Step EvaluatePredictionsSteps completed in 0.15 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.8214
Error en train: 0.2978
Error en test: 0.320

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.136966	valid_0's total_error: 0.944318
[200]	valid_0's l2: 0.133267	valid_0's total_error: 0.926898
[300]	valid_0's l2: 0.130733	valid_0's total_error: 0.887457
[400]	valid_0's l2: 0.130722	valid_0's total_error: 0.860602
[500]	valid_0's l2: 0.131384	valid_0's total_error: 0.811387
[600]	valid_0's l2: 0.130373	valid_0's total_error: 0.747087
[700]	valid_0's l2: 0.129427	valid_0's total_error: 0.732204
[800]	valid_0's l2: 0.131696	valid_0's total_error: 0.742118
[900]	valid_0's l2: 0.132479	valid_0's total_error: 0.709208
[1000]	valid_0's l2: 0.133254	valid_0's total_error: 0.69772
[1100]	valid_0's l2: 0.134208	valid_0's total_error: 0.733219
[1200]	valid_0's l2: 0.134132	valid_0's total_error: 0.680792
[1300]	valid_0's l2: 0.134266	valid_0's total_error: 0.653201
[1400]	valid_0's l2: 0.135962	valid_0's total_error: 0.621513
[1500]	valid_0's l2: 0.135825	valid_0's total_error: 0.607439
Step TrainModelStep completed in 19.82 seconds
Executing step: PredictStep
Step 

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.21 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.00 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.01 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.1078

Top 5 productos con mayor error absoluto:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
0       20237  57.406896    52.453525  57.406896  52.453525        4.953371
1       20460  15.686462    18.611720  15.686462  18.611720        2.925258
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 1.3185
Error en train: 0.0759
Error en test: 0.6405

Top 5 productos con mayor error absoluto en train:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
1       20460  10.093350    13.614525  10.093350  13.614525    3.521175e+00
0       20237  36.322471    

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0313641	valid_0's total_error: 0.904888
[200]	valid_0's l2: 0.0298187	valid_0's total_error: 0.819626
[300]	valid_0's l2: 0.0295542	valid_0's total_error: 0.782321
[400]	valid_0's l2: 0.0287566	valid_0's total_error: 0.799569
[500]	valid_0's l2: 0.0284249	valid_0's total_error: 0.806261
[600]	valid_0's l2: 0.0273429	valid_0's total_error: 0.74965
[700]	valid_0's l2: 0.0269979	valid_0's total_error: 0.739642
[800]	valid_0's l2: 0.0267369	valid_0's total_error: 0.767719
[900]	valid_0's l2: 0.0265945	valid_0's total_error: 0.746338
[1000]	valid_0's l2: 0.0261738	valid_0's total_error: 0.775
[1100]	valid_0's l2: 0.0264924	valid_0's total_error: 0.745416
[1200]	valid_0's l2: 0.0268722	valid_0's total_error: 0.76038
[1300]	valid_0's l2: 0.0262747	valid_0's total_error: 0.762272
[1400]	valid_0's l2: 0.0259839	valid_0's total_error: 0.768716
[1500]	valid_0's l2: 0.0258818	valid_0's total_error: 0.75602
Step TrainModelStep completed in 32.50 seconds
Executing step: Predict

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.0076543	valid_0's total_error: 0.36901
[200]	valid_0's l2: 0.0077264	valid_0's total_error: 0.408171
[300]	valid_0's l2: 0.00772992	valid_0's total_error: 0.380156
[400]	valid_0's l2: 0.00791521	valid_0's total_error: 0.383808
[500]	valid_0's l2: 0.00799229	valid_0's total_error: 0.351687
[600]	valid_0's l2: 0.00811436	valid_0's total_error: 0.359852
[700]	valid_0's l2: 0.00813226	valid_0's total_error: 0.368894
[800]	valid_0's l2: 0.00822336	valid_0's total_error: 0.362033
[900]	valid_0's l2: 0.00830528	valid_0's total_error: 0.369777
[1000]	valid_0's l2: 0.00842684	valid_0's total_error: 0.387834
[1100]	valid_0's l2: 0.00850476	valid_0's total_error: 0.386719
[1200]	valid_0's l2: 0.00862724	valid_0's total_error: 0.374423
[1300]	valid_0's l2: 0.00857748	valid_0's total_error: 0.392297
[1400]	valid_0's l2: 0.00867909	valid_0's total_error: 0.385524
[1500]	valid_0's l2: 0.00877773	valid_0's total_error: 0.390318
Step TrainModelStep completed in 24.15 seconds
Execu

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.00452379	valid_0's total_error: 0.729647
[200]	valid_0's l2: 0.00446246	valid_0's total_error: 0.58669
[300]	valid_0's l2: 0.00446953	valid_0's total_error: 0.517995
[400]	valid_0's l2: 0.0044774	valid_0's total_error: 0.50841
[500]	valid_0's l2: 0.00442494	valid_0's total_error: 0.362413
[600]	valid_0's l2: 0.00440747	valid_0's total_error: 0.254045
[700]	valid_0's l2: 0.0044096	valid_0's total_error: 0.230209
[800]	valid_0's l2: 0.00440541	valid_0's total_error: 0.187537
[900]	valid_0's l2: 0.00440467	valid_0's total_error: 0.16191
[1000]	valid_0's l2: 0.00440049	valid_0's total_error: 0.162995
[1100]	valid_0's l2: 0.00438869	valid_0's total_error: 0.187327
[1200]	valid_0's l2: 0.00438383	valid_0's total_error: 0.178034
[1300]	valid_0's l2: 0.00440372	valid_0's total_error: 0.143972
[1400]	valid_0's l2: 0.00441319	valid_0's total_error: 0.163434
[1500]	valid_0's l2: 0.00440603	valid_0's total_error: 0.228622
Step TrainModelStep completed in 17.98 seconds
Executi

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.30 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.01 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.01 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.0594

Top 5 productos con mayor error absoluto:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
1       21022  1.133048     0.913102  1.133048  0.913102        0.219946
0       20928  5.391934     5.214101  5.391934  5.214101        0.177833
2       21056  1.356857     1.286173  1.356857  1.286173        0.070684
Step EvaluatePredictionsSteps completed in 0.01 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.7804
Error en train: 0.2648
Error en test: 0.3780

Top 5 productos con mayor error absoluto en train:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
2       21056  1.073334     0.468275  1.073

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.00525071	valid_0's total_error: 0.869367
[200]	valid_0's l2: 0.00520155	valid_0's total_error: 0.858749
[300]	valid_0's l2: 0.0052058	valid_0's total_error: 0.857276
[400]	valid_0's l2: 0.00518161	valid_0's total_error: 0.865949
[500]	valid_0's l2: 0.00519643	valid_0's total_error: 0.874576
[600]	valid_0's l2: 0.00519593	valid_0's total_error: 0.884156
[700]	valid_0's l2: 0.00520072	valid_0's total_error: 0.885458
[800]	valid_0's l2: 0.00519408	valid_0's total_error: 0.887202
[900]	valid_0's l2: 0.00519972	valid_0's total_error: 0.89052
[1000]	valid_0's l2: 0.00520277	valid_0's total_error: 0.895018
[1100]	valid_0's l2: 0.00520353	valid_0's total_error: 0.897329
[1200]	valid_0's l2: 0.00522383	valid_0's total_error: 0.895847
[1300]	valid_0's l2: 0.00522378	valid_0's total_error: 0.9002
[1400]	valid_0's l2: 0.00523537	valid_0's total_error: 0.915942
[1500]	valid_0's l2: 0.0052319	valid_0's total_error: 0.917802
Step TrainModelStep completed in 67.97 seconds
Executi

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 2.49 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.03 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.04 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3956

Top 5 productos con mayor error absoluto:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
0       20263  11.042422    19.156691  11.042422  19.156691        8.114269
2       20321  18.421051    10.924209  18.421051  10.924209        7.496842
1       20267  12.365496    17.604494  12.365496  17.604494        5.238998
5       20481   6.040918     8.705036   6.040918   8.705036        2.664118
9       20599   2.269278     4.598238   2.269278   4.598238        2.328960
Step EvaluatePredictionsSteps completed in 0.04 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.7622
Error en train: 0.3085
Error en test: 0.3517

Top

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.00706465	valid_0's total_error: 1.05517
[200]	valid_0's l2: 0.0072005	valid_0's total_error: 1.08473
[300]	valid_0's l2: 0.00724617	valid_0's total_error: 1.10683
[400]	valid_0's l2: 0.00724349	valid_0's total_error: 1.11174
[500]	valid_0's l2: 0.00725561	valid_0's total_error: 1.12047
[600]	valid_0's l2: 0.00722867	valid_0's total_error: 1.11558
[700]	valid_0's l2: 0.00725257	valid_0's total_error: 1.11855
[800]	valid_0's l2: 0.00725205	valid_0's total_error: 1.12306
[900]	valid_0's l2: 0.00726712	valid_0's total_error: 1.11884
[1000]	valid_0's l2: 0.00727026	valid_0's total_error: 1.1161
[1100]	valid_0's l2: 0.00728421	valid_0's total_error: 1.10787
[1200]	valid_0's l2: 0.00731853	valid_0's total_error: 1.10827
[1300]	valid_0's l2: 0.00730873	valid_0's total_error: 1.10885
[1400]	valid_0's l2: 0.00727407	valid_0's total_error: 1.10417
[1500]	valid_0's l2: 0.00730156	valid_0's total_error: 1.10097
Step TrainModelStep completed in 102.78 seconds
Executing step: Pr

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 3.62 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.05 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.05 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3108

Top 5 productos con mayor error absoluto:
   product_id     target  predictions    tn_real    tn_pred  error_absoluto
1       20352  19.210268    11.554743  19.210268  11.554743        7.655526
3       20380  15.992130     9.419215  15.992130   9.419215        6.572915
7       20438  14.224800     7.673587  14.224800   7.673587        6.551213
4       20381  15.162040     9.320012  15.162040   9.320012        5.842028
5       20404   5.656020    10.600095   5.656020  10.600095        4.944075
Step EvaluatePredictionsSteps completed in 0.06 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.9402
Error en train: 0.3293
Error en test: 0.3414

Top

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.00182377	valid_0's total_error: 0.836585
[200]	valid_0's l2: 0.00182843	valid_0's total_error: 0.850135
[300]	valid_0's l2: 0.00185189	valid_0's total_error: 0.933473
[400]	valid_0's l2: 0.00191004	valid_0's total_error: 0.997483
[500]	valid_0's l2: 0.00195812	valid_0's total_error: 1.06252
[600]	valid_0's l2: 0.00200981	valid_0's total_error: 1.12575
[700]	valid_0's l2: 0.00206118	valid_0's total_error: 1.13628
[800]	valid_0's l2: 0.00210492	valid_0's total_error: 1.17062
[900]	valid_0's l2: 0.00210101	valid_0's total_error: 1.17389
[1000]	valid_0's l2: 0.00214427	valid_0's total_error: 1.2037
[1100]	valid_0's l2: 0.00214909	valid_0's total_error: 1.2112
[1200]	valid_0's l2: 0.00217976	valid_0's total_error: 1.22738
[1300]	valid_0's l2: 0.00218463	valid_0's total_error: 1.23028
[1400]	valid_0's l2: 0.0021827	valid_0's total_error: 1.23995
[1500]	valid_0's l2: 0.00220171	valid_0's total_error: 1.25129
Step TrainModelStep completed in 33.36 seconds
Executing step: 

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.10 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.02 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.02 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.5997

Top 5 productos con mayor error absoluto:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
4       20667  2.550402     7.401281  2.550402  7.401281        4.850878
0       20424  6.704089     9.529806  6.704089  9.529806        2.825717
2       20484  4.573177     7.228927  4.573177  7.228927        2.655749
1       20432  5.829678     7.965596  5.829678  7.965596        2.135918
3       20505  5.395396     7.507859  5.395396  7.507859        2.112463
Step EvaluatePredictionsSteps completed in 0.03 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.6100
Error en train: 0.1947
Error en test: 0.2625

Top 5 productos con m

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.000575216	valid_0's total_error: 1.10192
[200]	valid_0's l2: 0.000558929	valid_0's total_error: 1.09201
[300]	valid_0's l2: 0.000555042	valid_0's total_error: 1.06285
[400]	valid_0's l2: 0.000560401	valid_0's total_error: 1.05924
[500]	valid_0's l2: 0.000556533	valid_0's total_error: 1.03726
[600]	valid_0's l2: 0.000566896	valid_0's total_error: 1.02999
[700]	valid_0's l2: 0.000572228	valid_0's total_error: 1.0404
[800]	valid_0's l2: 0.000575267	valid_0's total_error: 1.03184
[900]	valid_0's l2: 0.000581475	valid_0's total_error: 1.02046
[1000]	valid_0's l2: 0.000581976	valid_0's total_error: 1.00997
[1100]	valid_0's l2: 0.000583461	valid_0's total_error: 0.99619
[1200]	valid_0's l2: 0.000585767	valid_0's total_error: 0.983814
[1300]	valid_0's l2: 0.000583156	valid_0's total_error: 0.964248
[1400]	valid_0's l2: 0.000589011	valid_0's total_error: 0.953161
[1500]	valid_0's l2: 0.000589746	valid_0's total_error: 0.956811
Step TrainModelStep completed in 94.64 seconds

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 3.69 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.04 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.07 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3201

Top 5 productos con mayor error absoluto:
    product_id    target  predictions   tn_real   tn_pred  error_absoluto
2        20549  2.796434     4.654667  2.796434  4.654667        1.858234
1        20538  3.949441     5.612597  3.949441  5.612597        1.663156
7        20714  1.502158     2.915160  1.502158  2.915160        1.413002
26       21163  0.222637     1.569384  0.222637  1.569384        1.346747
5        20693  1.807688     3.063042  1.807688  3.063042        1.255354
Step EvaluatePredictionsSteps completed in 0.06 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.6938
Error en train: 0.2928
Error en test: 0.3956

Top 5 productos

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.00155566	valid_0's total_error: 0.618374
[200]	valid_0's l2: 0.00155964	valid_0's total_error: 0.4731
[300]	valid_0's l2: 0.00159531	valid_0's total_error: 0.477947
[400]	valid_0's l2: 0.00161015	valid_0's total_error: 0.501173
[500]	valid_0's l2: 0.00162753	valid_0's total_error: 0.582148
[600]	valid_0's l2: 0.00164981	valid_0's total_error: 0.661057
[700]	valid_0's l2: 0.00165771	valid_0's total_error: 0.733519
[800]	valid_0's l2: 0.00165345	valid_0's total_error: 0.820059
[900]	valid_0's l2: 0.00167044	valid_0's total_error: 0.959741
[1000]	valid_0's l2: 0.00167172	valid_0's total_error: 0.995746
[1100]	valid_0's l2: 0.00167826	valid_0's total_error: 1.00515
[1200]	valid_0's l2: 0.00169167	valid_0's total_error: 1.0398
[1300]	valid_0's l2: 0.00170309	valid_0's total_error: 1.08081
[1400]	valid_0's l2: 0.0017119	valid_0's total_error: 1.13744
[1500]	valid_0's l2: 0.00172291	valid_0's total_error: 1.16705
Step TrainModelStep completed in 34.45 seconds
Executing s

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.36 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.02 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.03 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.3171

Top 5 productos con mayor error absoluto:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
1       20482  8.016826     5.089852  8.016826  5.089852        2.926974
4       20596  4.733230     2.080801  4.733230  2.080801        2.652429
0       20470  7.275974     5.875838  7.275974  5.875838        1.400136
3       20579  4.953015     3.688483  4.953015  3.688483        1.264532
2       20560  3.509262     2.645421  3.509262  2.645421        0.863841
Step EvaluatePredictionsSteps completed in 0.04 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 1.2931
Error en train: 0.1696
Error en test: 0.1961

Top 5 productos con m

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.00111215	valid_0's total_error: 1.12428
[200]	valid_0's l2: 0.00113721	valid_0's total_error: 1.20049
[300]	valid_0's l2: 0.00114782	valid_0's total_error: 1.21141
[400]	valid_0's l2: 0.00117253	valid_0's total_error: 1.20144
[500]	valid_0's l2: 0.00117684	valid_0's total_error: 1.19885
[600]	valid_0's l2: 0.00117537	valid_0's total_error: 1.16778
[700]	valid_0's l2: 0.00119027	valid_0's total_error: 1.16641
[800]	valid_0's l2: 0.0011974	valid_0's total_error: 1.14795
[900]	valid_0's l2: 0.00120544	valid_0's total_error: 1.12946
[1000]	valid_0's l2: 0.00121917	valid_0's total_error: 1.12753
[1100]	valid_0's l2: 0.00122041	valid_0's total_error: 1.11423
[1200]	valid_0's l2: 0.00122347	valid_0's total_error: 1.09898
[1300]	valid_0's l2: 0.00122389	valid_0's total_error: 1.08072
[1400]	valid_0's l2: 0.00122983	valid_0's total_error: 1.06838
[1500]	valid_0's l2: 0.0012324	valid_0's total_error: 1.05861
Step TrainModelStep completed in 32.88 seconds
Executing step: Pre

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.00 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.02 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.02 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.2707

Top 5 productos con mayor error absoluto:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
0       20517  6.609933     4.991617  6.609933  4.991617        1.618315
3       20676  1.515272     2.403248  1.515272  2.403248        0.887976
4       20702  1.579659     2.423989  1.579659  2.423989        0.844330
2       20644  3.055013     2.651312  3.055013  2.651312        0.403701
1       20568  3.988671     3.612524  3.988671  3.612524        0.376146
Step EvaluatePredictionsSteps completed in 0.03 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 1.0332
Error en train: 0.3479
Error en test: 0.1766

Top 5 productos con m

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.000858063	valid_0's total_error: 0.989951
[200]	valid_0's l2: 0.000860667	valid_0's total_error: 0.942635
[300]	valid_0's l2: 0.000861267	valid_0's total_error: 0.898641
[400]	valid_0's l2: 0.000866672	valid_0's total_error: 0.883859
[500]	valid_0's l2: 0.000872027	valid_0's total_error: 0.863207
[600]	valid_0's l2: 0.00087684	valid_0's total_error: 0.81613
[700]	valid_0's l2: 0.000881251	valid_0's total_error: 0.822822
[800]	valid_0's l2: 0.000877419	valid_0's total_error: 0.820744
[900]	valid_0's l2: 0.000874253	valid_0's total_error: 0.821534
[1000]	valid_0's l2: 0.000876161	valid_0's total_error: 0.817364
[1100]	valid_0's l2: 0.000877416	valid_0's total_error: 0.845931
[1200]	valid_0's l2: 0.000878216	valid_0's total_error: 0.842699
[1300]	valid_0's l2: 0.000878542	valid_0's total_error: 0.844215
[1400]	valid_0's l2: 0.000880665	valid_0's total_error: 0.836715
[1500]	valid_0's l2: 0.000878385	valid_0's total_error: 0.834767
Step TrainModelStep completed in 53.

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 0.000381846	valid_0's total_error: 0.843363
[200]	valid_0's l2: 0.000384871	valid_0's total_error: 0.80127
[300]	valid_0's l2: 0.00038741	valid_0's total_error: 0.792188
[400]	valid_0's l2: 0.000389186	valid_0's total_error: 0.782964
[500]	valid_0's l2: 0.000402301	valid_0's total_error: 0.788418
[600]	valid_0's l2: 0.000410319	valid_0's total_error: 0.805808
[700]	valid_0's l2: 0.000420335	valid_0's total_error: 0.819704
[800]	valid_0's l2: 0.000427608	valid_0's total_error: 0.821289
[900]	valid_0's l2: 0.000430466	valid_0's total_error: 0.812102
[1000]	valid_0's l2: 0.000432229	valid_0's total_error: 0.785506
[1100]	valid_0's l2: 0.000430582	valid_0's total_error: 0.772262
[1200]	valid_0's l2: 0.000428704	valid_0's total_error: 0.772591
[1300]	valid_0's l2: 0.000431444	valid_0's total_error: 0.778932
[1400]	valid_0's l2: 0.000441034	valid_0's total_error: 0.794183
[1500]	valid_0's l2: 0.000442972	valid_0's total_error: 0.801547
Step TrainModelStep completed in 38.

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 1.00 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.01 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.02 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.4742

Top 5 productos con mayor error absoluto:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
3       20840  0.709230     2.168350  0.709230  2.168350        1.459120
2       20838  0.955050     2.035248  0.955050  2.035248        1.080198
0       20583  4.225823     5.059568  4.225823  5.059568        0.833745
4       20849  1.134377     1.813997  1.134377  1.813997        0.679620
1       20654  2.935739     3.607938  2.935739  3.607938        0.672199
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.6665
Error en train: 0.3054
Error en test: 0.4102

Top 5 productos con m

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 7.57703e-05	valid_0's total_error: 0.185958
[200]	valid_0's l2: 4.0766e-05	valid_0's total_error: 0.138296
[300]	valid_0's l2: 3.86347e-05	valid_0's total_error: 0.198321
[400]	valid_0's l2: 4.35732e-05	valid_0's total_error: 0.266905
[500]	valid_0's l2: 5.09962e-05	valid_0's total_error: 0.335914
[600]	valid_0's l2: 5.69309e-05	valid_0's total_error: 0.358926
[700]	valid_0's l2: 6.38428e-05	valid_0's total_error: 0.391765
[800]	valid_0's l2: 7.0303e-05	valid_0's total_error: 0.43678
[900]	valid_0's l2: 7.72055e-05	valid_0's total_error: 0.456976
[1000]	valid_0's l2: 7.94656e-05	valid_0's total_error: 0.474832
[1100]	valid_0's l2: 8.28876e-05	valid_0's total_error: 0.477565
[1200]	valid_0's l2: 9.13168e-05	valid_0's total_error: 0.51597
[1300]	valid_0's l2: 9.63334e-05	valid_0's total_error: 0.525396
[1400]	valid_0's l2: 9.62475e-05	valid_0's total_error: 0.52696
[1500]	valid_0's l2: 0.000106487	valid_0's total_error: 0.559543
Step TrainModelStep completed in 16.80 

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 1.69301e-05	valid_0's total_error: 0.714739
[200]	valid_0's l2: 1.79371e-05	valid_0's total_error: 0.848683
[300]	valid_0's l2: 2.02092e-05	valid_0's total_error: 1.13017
[400]	valid_0's l2: 2.21837e-05	valid_0's total_error: 1.41413
[500]	valid_0's l2: 2.27403e-05	valid_0's total_error: 1.50609
[600]	valid_0's l2: 2.34661e-05	valid_0's total_error: 1.57827
[700]	valid_0's l2: 2.3457e-05	valid_0's total_error: 1.58476
[800]	valid_0's l2: 2.32006e-05	valid_0's total_error: 1.36095
[900]	valid_0's l2: 2.3522e-05	valid_0's total_error: 1.40467
[1000]	valid_0's l2: 2.35634e-05	valid_0's total_error: 1.47016
[1100]	valid_0's l2: 2.31835e-05	valid_0's total_error: 1.41049
[1200]	valid_0's l2: 2.27533e-05	valid_0's total_error: 1.21851
[1300]	valid_0's l2: 2.16768e-05	valid_0's total_error: 1.24164
[1400]	valid_0's l2: 2.20438e-05	valid_0's total_error: 1.21941
[1500]	valid_0's l2: 2.23256e-05	valid_0's total_error: 1.18921
Step TrainModelStep completed in 37.73 seconds
Ex

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.95 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.02 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.02 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.2968

Top 5 productos con mayor error absoluto:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
2       21192  0.028426     0.120138  0.028426  0.120138        0.091712
1       21170  0.159473     0.234643  0.159473  0.234643        0.075171
5       21222  0.044435     0.113234  0.044435  0.113234        0.068799
0       20982  0.886092     0.830246  0.886092  0.830246        0.055846
4       21218  0.045135     0.091504  0.045135  0.091504        0.046370
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 1.1799
Error en train: 0.2610
Error en test: 0.6265

Top 5 productos con m

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 2.80042e-05	valid_0's total_error: 1.43781
[200]	valid_0's l2: 3.00391e-05	valid_0's total_error: 0.927549
[300]	valid_0's l2: 3.01536e-05	valid_0's total_error: 0.745932
[400]	valid_0's l2: 3.04583e-05	valid_0's total_error: 0.62965
[500]	valid_0's l2: 2.9786e-05	valid_0's total_error: 0.530372
[600]	valid_0's l2: 2.97012e-05	valid_0's total_error: 0.533107
[700]	valid_0's l2: 2.8798e-05	valid_0's total_error: 0.574761
[800]	valid_0's l2: 2.86716e-05	valid_0's total_error: 0.635169
[900]	valid_0's l2: 2.80652e-05	valid_0's total_error: 0.678964
[1000]	valid_0's l2: 2.78005e-05	valid_0's total_error: 0.693053
[1100]	valid_0's l2: 2.75538e-05	valid_0's total_error: 0.697178
[1200]	valid_0's l2: 2.72761e-05	valid_0's total_error: 0.723372
[1300]	valid_0's l2: 2.7296e-05	valid_0's total_error: 0.740019
[1400]	valid_0's l2: 2.70726e-05	valid_0's total_error: 0.756695
[1500]	valid_0's l2: 2.69495e-05	valid_0's total_error: 0.77552
Step TrainModelStep completed in 23.68 s

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.75 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.01 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.01 seconds
Executing step: EvaluatePredictionsSteps
Error en test: 0.4291

Top 5 productos con mayor error absoluto:
   product_id    target  predictions   tn_real   tn_pred  error_absoluto
5       21245  0.068272     0.223186  0.068272  0.223186        0.154914
0       21142  0.602781     0.512461  0.602781  0.512461        0.090320
4       21233  0.079804     0.158948  0.079804  0.158948        0.079144
1       21191  0.146995     0.215604  0.146995  0.215604        0.068609
3       21226  0.147976     0.213291  0.147976  0.213291        0.065315
Step EvaluatePredictionsSteps completed in 0.02 seconds
Executing step: EvaluatePredictionsOptimizatedSteps
alfa optimizado: 0.8434
Error en train: 0.2872
Error en test: 0.5489

Top 5 productos con m

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


[100]	valid_0's l2: 1.30734e-06	valid_0's total_error: 0.911761
[200]	valid_0's l2: 9.73071e-07	valid_0's total_error: 0.632028
[300]	valid_0's l2: 1.11821e-06	valid_0's total_error: 0.501755
[400]	valid_0's l2: 1.25656e-06	valid_0's total_error: 0.451271
[500]	valid_0's l2: 1.46132e-06	valid_0's total_error: 0.435465
[600]	valid_0's l2: 1.71978e-06	valid_0's total_error: 0.438427
[700]	valid_0's l2: 1.78077e-06	valid_0's total_error: 0.431299
[800]	valid_0's l2: 1.88346e-06	valid_0's total_error: 0.434133
[900]	valid_0's l2: 2.05382e-06	valid_0's total_error: 0.443544
[1000]	valid_0's l2: 2.00365e-06	valid_0's total_error: 0.430082
[1100]	valid_0's l2: 2.03885e-06	valid_0's total_error: 0.41818
[1200]	valid_0's l2: 2.12902e-06	valid_0's total_error: 0.425334
[1300]	valid_0's l2: 2.18616e-06	valid_0's total_error: 0.425328
[1400]	valid_0's l2: 2.19284e-06	valid_0's total_error: 0.423521
[1500]	valid_0's l2: 2.23506e-06	valid_0's total_error: 0.425121
Step TrainModelStep completed in 23

  df['base_prediction'] = (


In [5]:
pipeline_results

{'ARIEL': {'eval_df':     product_id       target  predictions      tn_real      tn_pred  \
  0        20001  1261.322642  1515.850805  1261.322642  1515.850805   
  1        20045   115.114431   183.234329   115.114431   183.234329   
  2        20102    35.854906    49.655575    35.854906    49.655575   
  3        20135   106.473853   108.751898   106.473853   108.751898   
  4        20150    68.611140    88.441539    68.611140    88.441539   
  5        20378    27.132943    24.909370    27.132943    24.909370   
  6        20611    10.865433    16.318514    10.865433    16.318514   
  7        20649    11.354645    75.591061    11.354645    75.591061   
  8        20682     5.673025    14.260145     5.673025    14.260145   
  9        20691    23.896790    56.474383    23.896790    56.474383   
  10       20919     0.970280     4.297042     0.970280     4.297042   
  
      error_absoluto  
  0       254.528162  
  1        68.119897  
  2        13.800669  
  3         2.278045 

In [6]:
final_eval_scaled = pd.DataFrame()
for brand, result in pipeline_results.items():
    alpha_opt = result["alpha_opt"]
    eval_df = result["eval_df"]
    eval_df["tn_pred_scaled"] = eval_df["tn_pred"] * alpha_opt
    final_eval_scaled = pd.concat([final_eval_scaled, eval_df], ignore_index=True)


In [7]:
final_eval_scaled

Unnamed: 0,product_id,target,predictions,tn_real,tn_pred,error_absoluto,tn_pred_scaled
0,20001,1261.322642,1515.850805,1261.322642,1515.850805,254.528162,1276.466155
1,20045,115.114431,183.234329,115.114431,183.234329,68.119897,154.297783
2,20102,35.854906,49.655575,35.854906,49.655575,13.800669,41.813918
3,20135,106.473853,108.751898,106.473853,108.751898,2.278045,91.577691
4,20150,68.611140,88.441539,68.611140,88.441539,19.830400,74.474764
...,...,...,...,...,...,...,...
729,21248,0.021150,-0.006976,0.021150,-0.006976,0.028126,-0.000000
730,21256,0.029625,-0.014238,0.029625,-0.014238,0.043863,-0.000000
731,21259,0.031038,-0.013992,0.031038,-0.013992,0.045030,-0.000000
732,21262,0.022550,-0.012560,0.022550,-0.012560,0.035110,-0.000000


In [8]:
total_error_1 = np.sum(np.abs(final_eval_scaled["tn_pred"] - final_eval_scaled["tn_real"])) / np.sum(final_eval_scaled["tn_real"])
total_error_2 = np.sum(np.abs(final_eval_scaled["tn_pred_scaled"] - final_eval_scaled["tn_real"])) / np.sum(final_eval_scaled["tn_real"])
print(f"Total error without scaling: {total_error_1}")
print(f"Total error with scaling: {total_error_2}")

Total error without scaling: 0.37683290545212056
Total error with scaling: 0.21524080665513978


In [9]:
# entreno pipeline final para kaggle
kaggle_results = {}
for brand, result in pipeline_results.items():
    print(f"Processing brand: {brand}")
    alpha_opt = result["alpha_opt"]
    pipeline = Pipeline(
        steps=[
            LoadDataFrameStep("df_intermedio.parquet"),
            GroupByProductStep(),
            DateRelatedFeaturesStep(),

            FilterDatasetByColumn(column="brand", value=brand),
            #Log1pTranformation(),
            #CreateTargetColumDiffStep(target_col="tn"),
            #CreateTargetColumStep(target_col="tn"),
            CreateResidualTargetStep(target_col="tn", window=12),
            ReduceMemoryUsageStep(),

            #ReduceMemoryUsageStep(),
            FeatureEngineeringLagStep(lags=[1,2,3,5,11,23], columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMeanFeatureStep(window=3, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMaxFeatureStep(window=3, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMinFeatureStep(window=3, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMeanFeatureStep(window=9, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMaxFeatureStep(window=9, columns=["tn", "cust_request_qty", "stock_final"]),
            RollingMinFeatureStep(window=9, columns=["tn", "cust_request_qty", "stock_final"]),

            ReduceMemoryUsageStep(),

            RollingStdFeatureStep(window=3, columns=["tn", "cust_request_qty"]),
            RollingStdFeatureStep(window=6, columns=["tn", "cust_request_qty"]),
            RollingStdFeatureStep(window=12, columns=["tn", "cust_request_qty"]), 

            RollingSkewFeatureStep(window=3, columns=["tn", "cust_request_qty"]),
            RollingSkewFeatureStep(window=6, columns=["tn", "cust_request_qty"]),
            RollingSkewFeatureStep(window=12, columns=["tn", "cust_request_qty"]),
            ReduceMemoryUsageStep(),

            RollingZscoreFeatureStep(window=3, columns=["tn", "cust_request_qty"]),
            RollingZscoreFeatureStep(window=6, columns=["tn", "cust_request_qty"]),
            RollingZscoreFeatureStep(window=12, columns=["tn", "cust_request_qty"]),
            DiffFeatureStep(periods=1, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=2, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=3, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=4, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=5, columns=["tn", "cust_request_qty", "stock_final"]),
            DiffFeatureStep(periods=11, columns=["tn", "cust_request_qty", "stock_final"]),
            FeatureEngineeringProductCatInteractionStep(cat="cat1", tn="tn"),
            FeatureEngineeringProductCatInteractionStep(cat="cat2", tn="tn"),
            FeatureEngineeringProductCatInteractionStep(cat="cat3", tn="tn"),
            #FeatureEngineeringProductCatInteractionStep(cat="product_id", tn="tn"),

            CreateTotalCategoryStep(cat="cat1"),
            CreateTotalCategoryStep(cat="cat2"),
            CreateTotalCategoryStep(cat="cat3"),
            CreateTotalCategoryStep(cat="brand"),
            CreateTotalCategoryStep(cat="customer_id"),
            CreateTotalCategoryStep(cat="product_id"),
                    
            CreateTotalCategoryStep(cat="cat1", tn="stock_final"),
            CreateTotalCategoryStep(cat="cat2", tn="stock_final"),
            CreateTotalCategoryStep(cat="cat3", tn="stock_final"),

            #ReduceMemoryUsageStep(),
            FeatureDivInteractionStep(columns=[
                    ("tn", "tn_cat1_vendidas"), 
                    ("tn", "tn_cat2_vendidas"), 
                    ("tn", "tn_cat3_vendidas"), 
                    ("tn", "tn_brand_vendidas")]
            ),
            #ReduceMemoryUsageStep(),

            FeatureProdInteractionStep(columns=[("tn", "cust_request_qty")]),
            CreateWeightByCustomerStep(),
            CreateWeightByProductStep(),
            ReduceMemoryUsageStep(),
            DeleteBadColumns(),
            FilterProductsIDStep(dfs=["df"]),
            SplitDataFrameStep2(df="df", test_date=35, gap=1),
            PrepareXYStep(),
            TrainModelStep(params={"num_leaves":31, "feature_fraction":0.2}),
            PredictStep(),
            InverseResidualTargetStep(),
            #InverseLog1pTranformation(),
            InverseScalePredictionsStep(),
            IntegratePredictionsStep(),
            #PlotFeatureImportanceStep(),
            KaggleSubmissionStep(),
            KaggleSubmissionStep(alpha_opt=alpha_opt, experiment="alpha"),

        ],
        optimize_arftifacts_memory=True
    )

    try: 
        pipeline.run()
        
        # Save the results
        kaggle_results[brand] = {
            "submission": pipeline.get_artifact("submission"),
            "submission_alpha": pipeline.get_artifact("submissionalpha"),
        }
    except Exception as e:
        print(f"Error processing brand {brand}: {e}")
        continue

Processing brand: ARIEL
Executing step: LoadDataFrameStep
Step LoadDataFrameStep completed in 2.12 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.46 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(761, 16)
Step FilterDatasetByColumn completed in 0.13 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.06 MB
--- Memory usage after: 0.03 MB
--- Decreased memory usage by 52.0%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.02 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.02 seconds
Executing step: RollingMinFeatureStep
S

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 2.00 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.45 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(1843, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.02 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.15 MB
--- Memory usage after: 0.07 MB
--- Decreased memory usage by 52.5%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.03 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.03 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.03 seconds
Execu

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.03 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.00 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.00 seconds
Executing step: KaggleSubmissionStep
Step KaggleSubmissionStep completed in 0.01 seconds
Executing step: KaggleSubmissionStep
Step KaggleSubmissionStep completed in 0.00 seconds
Processing brand: NATURA
Executing step: LoadDataFrameStep
Step LoadDataFrameStep completed in 2.03 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.48 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(1281, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.10 MB
--- Mem

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.93 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.52 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(549, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.05 MB
--- Memory usage after: 0.02 MB
--- Decreased memory usage by 51.6%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.99 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.48 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(658, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.05 MB
--- Memory usage after: 0.03 MB
--- Decreased memory usage by 51.8%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.02 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.02 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 2.00 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.44 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(2493, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.02 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.20 MB
--- Memory usage after: 0.10 MB
--- Decreased memory usage by 52.6%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.04 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.04 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.04 seconds
Execu

  df['base_prediction'] = (


Step KaggleSubmissionStep completed in 0.16 seconds
Executing step: KaggleSubmissionStep
Step KaggleSubmissionStep completed in 0.01 seconds
Processing brand: MOSTAZA1
Executing step: LoadDataFrameStep
Step LoadDataFrameStep completed in 2.05 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.44 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(285, 16)
Step FilterDatasetByColumn completed in 0.00 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.02 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 50.5%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.91 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.44 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(127, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.01 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 47.7%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.94 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.45 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(3553, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.02 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.29 MB
--- Memory usage after: 0.14 MB
--- Decreased memory usage by 52.7%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.05 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.05 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.14 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.90 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.50 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(1191, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.02 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.10 MB
--- Memory usage after: 0.05 MB
--- Decreased memory usage by 52.3%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.03 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.02 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.03 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.98 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.42 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(510, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.04 MB
--- Memory usage after: 0.02 MB
--- Decreased memory usage by 51.5%

Step ReduceMemoryUsageStep completed in 0.00 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.94 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.51 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(4073, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.03 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.33 MB
--- Memory usage after: 0.16 MB
--- Decreased memory usage by 52.8%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.08 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.08 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.08 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 2.04 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.47 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(422, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.04 MB
--- Memory usage after: 0.02 MB
--- Decreased memory usage by 51.2%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.95 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.51 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(2473, 16)
Step FilterDatasetByColumn completed in 0.04 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.02 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.20 MB
--- Memory usage after: 0.10 MB
--- Decreased memory usage by 52.6%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.05 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.04 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.05 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.90 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.44 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(252, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.02 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 50.2%

Step ReduceMemoryUsageStep completed in 0.00 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.94 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.48 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(279, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.02 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 50.4%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.91 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.53 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(229, 16)
Step FilterDatasetByColumn completed in 0.00 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.02 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 49.9%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.93 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.46 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(3049, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.02 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.25 MB
--- Memory usage after: 0.12 MB
--- Decreased memory usage by 52.7%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.06 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.06 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.06 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.91 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.52 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(92, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.01 MB
--- Memory usage after: 0.00 MB
--- Decreased memory usage by 45.9%

Step ReduceMemoryUsageStep completed in 0.00 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Executi

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.94 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.56 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(180, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.00 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.02 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 49.1%

Step ReduceMemoryUsageStep completed in 0.06 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.02 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.95 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.47 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(842, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.07 MB
--- Memory usage after: 0.03 MB
--- Decreased memory usage by 52.1%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.02 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.02 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.02 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.93 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.54 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(1442, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.12 MB
--- Memory usage after: 0.06 MB
--- Decreased memory usage by 52.4%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.03 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.02 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.03 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.93 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.50 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(358, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.03 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 50.9%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.90 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.53 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.05 seconds
Executing step: FilterDatasetByColumn
(1132, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.09 MB
--- Memory usage after: 0.04 MB
--- Decreased memory usage by 52.3%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.02 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.02 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.02 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.92 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.55 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(432, 16)
Step FilterDatasetByColumn completed in 0.00 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.04 MB
--- Memory usage after: 0.02 MB
--- Decreased memory usage by 51.3%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.03 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.00 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.00 seconds
Executing step: KaggleSubmissionStep
Step KaggleSubmissionStep completed in 0.00 seconds
Executing step: KaggleSubmissionStep
Step KaggleSubmissionStep completed in 0.00 seconds
Processing brand: ESPADOL
Executing step: LoadDataFrameStep
Step LoadDataFrameStep completed in 1.96 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.46 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(410, 16)
Step FilterDatasetByColumn completed in 0.05 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.03 MB
--- Mem

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 11.88 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.47 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(453, 16)
Step FilterDatasetByColumn completed in 0.04 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.04 MB
--- Memory usage after: 0.02 MB
--- Decreased memory usage by 51.4%

Step ReduceMemoryUsageStep completed in 0.02 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execu

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.92 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.53 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(58, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.01 MB
--- Memory usage after: 0.00 MB
--- Decreased memory usage by 42.6%

Step ReduceMemoryUsageStep completed in 0.00 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Executi

  df['base_prediction'] = (


Step LoadDataFrameStep completed in 1.91 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.53 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(112, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.01 MB
--- Memory usage after: 0.01 MB
--- Decreased memory usage by 48.1%

Step ReduceMemoryUsageStep completed in 0.01 seconds
Executing step: FeatureEngineeringLagStep
Step FeatureEngineeringLagStep completed in 0.01 seconds
Executing step: RollingMeanFeatureStep
Step RollingMeanFeatureStep completed in 0.01 seconds
Executing step: RollingMaxFeatureStep
Step RollingMaxFeatureStep completed in 0.01 seconds
Executing step: RollingMinFeatureStep
Step RollingMinFeatureStep completed in 0.01 seconds
Execut

  df['base_prediction'] = (


Step InverseResidualTargetStep completed in 0.02 seconds
Executing step: InverseScalePredictionsStep
Step InverseScalePredictionsStep completed in 0.00 seconds
Executing step: IntegratePredictionsStep
Step IntegratePredictionsStep completed in 0.00 seconds
Executing step: KaggleSubmissionStep
Step KaggleSubmissionStep completed in 0.00 seconds
Executing step: KaggleSubmissionStep
Step KaggleSubmissionStep completed in 0.00 seconds
Processing brand: VICHY
Executing step: LoadDataFrameStep
Step LoadDataFrameStep completed in 1.91 seconds
Executing step: GroupByProductStep
Step GroupByProductStep completed in 1.54 seconds
Executing step: DateRelatedFeaturesStep
Step DateRelatedFeaturesStep completed in 0.04 seconds
Executing step: FilterDatasetByColumn
(119, 16)
Step FilterDatasetByColumn completed in 0.01 seconds
Executing step: CreateResidualTargetStep
Step CreateResidualTargetStep completed in 0.01 seconds
Executing step: ReduceMemoryUsageStep
--- Memory usage before: 0.01 MB
--- Memor

  df['base_prediction'] = (


In [10]:
final_submission_scaled = pd.DataFrame(columns=["product_id", "tn"])
final_submission_no_scaled = pd.DataFrame(columns=["product_id", "tn"])
for brand, result in kaggle_results.items():
    submission_normal = result["submission"]
    submission_alpha = result["submission_alpha"]
    final_submission_no_scaled = pd.concat([final_submission_no_scaled, submission_normal], ignore_index=True)
    final_submission_scaled = pd.concat([final_submission_scaled, submission_alpha], ignore_index=True)



In [11]:
final_submission_no_scaled

Unnamed: 0,product_id,tn
0,20001,1345.738387
1,20045,170.647413
2,20102,11.787400
3,20135,137.287058
4,20150,38.643878
...,...,...
775,21248,-0.087762
776,21256,-0.097195
777,21259,-0.083637
778,21262,-0.096032


In [12]:
final_submission_scaled

Unnamed: 0,product_id,tn
0,20001,1133.218058
1,20045,143.698605
2,20102,9.925922
3,20135,115.606551
4,20150,32.541199
...,...,...
775,21248,0.000000
776,21256,0.000000
777,21259,0.000000
778,21262,0.000000


In [14]:
# pasar final_submission_no_scaled a menor a 0 a 0
final_submission_no_scaled["tn"] = final_submission_no_scaled["tn"].apply(lambda x: max(x, 0))


In [15]:
final_submission_scaled["tn"] = final_submission_scaled["tn"].apply(lambda x: max(x, 0))

In [16]:
final_submission_no_scaled.to_csv("submission_no_scaled.csv", index=False)
final_submission_scaled.to_csv("submission_scaled.csv", index=False)