In [54]:
import pandas as pd
import numpy as np
import importlib
import gc
import sys
import warnings
sys.path.append('./scripts')  
import preprocesamiento
import feature_engineering
import model_autogluon
importlib.reload(preprocesamiento)
importlib.reload(model_autogluon)
importlib.reload(feature_engineering)
warnings.filterwarnings("ignore")

# Experimento 6: 
- Autogluon
- 12 kfold ponderados
- Kaggle el ponderado dio = 0.267
- Kaggle con 12 val_windows = 0.254
- Tardo 1600 minutos (27 horas)


In [55]:
# df = model_autogluon.ensemble_de_ventanasValidacion()

In [56]:
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

# Carga y preparación de datos
df = pd.read_csv("./datasets/periodo_x_producto_con_target.csv", sep=',', encoding='utf-8')

# Verificación y limpieza de datos
print("Verificando datos...")
print(f"Filas originales: {len(df)}")
df = df.dropna(subset=['periodo', 'product_id', 'tn'])
print(f"Filas después de limpieza: {len(df)}")

# Agregación y transformación
dfg = df.groupby(['periodo', 'product_id']).agg({'tn': 'sum'}).reset_index()
dfg['periodo_dt'] = pd.to_datetime(dfg['periodo'].astype(str), format='%Y%m')
dfg.rename(columns={'tn': 'target', 'product_id':'item_id', 'periodo_dt': 'timestamp'}, inplace=True)
dfg.drop(columns=['periodo'], inplace=True)

# Filtrar productos
productos_ok = pd.read_csv('../../data/raw/product_id_apredecir201912.csv', sep=',')
dfg = dfg[dfg['item_id'].isin(productos_ok['product_id'].unique())]
print(f"Productos únicos a predecir: {len(dfg['item_id'].unique())}")

# Conversión a TimeSeriesDataFrame con verificación
if len(dfg) == 0:
    raise ValueError("El DataFrame está vacío después del filtrado")

try:
    data = TimeSeriesDataFrame.from_data_frame(
        dfg,
        id_column="item_id",
        timestamp_column="timestamp"
    )
    print("TimeSeriesDataFrame creado exitosamente")
    print(f"Número de series temporales: {len(data.item_ids)}")
except Exception as e:
    raise ValueError(f"Error al crear TimeSeriesDataFrame: {str(e)}")

all_predictions = []

for n_windows in range(1, 13):  # Probando con 2 ventanas
    print(f"\n--- Entrenamiento con {n_windows} ventana(s) ---")
    
    try:
        predictor = TimeSeriesPredictor(
            target='target',
            prediction_length=2,
            freq="M",
            eval_metric="MAPE"
        ).fit(
            data,
            num_val_windows=n_windows,
            verbosity=2  # Más detalle en logs
        )
        
        preds = predictor.predict(data)
        print("Predicciones obtenidas exitosamente")
        
        # Procesamiento robusto de predicciones
        preds_202002 = preds.reset_index()
        preds_202002 = preds_202002[preds_202002['timestamp'] == '2020-02-29']
        
        if len(preds_202002) == 0:
            print(f"Advertencia: No hay predicciones para febrero 2020 con {n_windows} ventanas")
            continue
            
        preds_202002 = preds_202002[["item_id", "mean"]].rename(columns={
            "item_id": "product_id", 
            "mean": f"pred_windows_{n_windows}"
        })
        
        all_predictions.append(preds_202002.set_index("product_id"))
        print(f"Predicciones para {n_windows} ventanas procesadas")
        
    except Exception as e:
        print(f"Error durante el entrenamiento/predicción: {str(e)}")
        continue

# Verificación final antes de consolidar
if not all_predictions:
    raise ValueError("No se generaron predicciones válidas en ninguna iteración")

print("\nConsolidando resultados...")
final_df = pd.concat(all_predictions, axis=1)
print(f"DataFrame consolidado: {final_df.shape}")



# Resultado final
final_df = final_df.reset_index().sort_values('product_id')

# Guardado seguro
output_path = "./outputs/predicciones_exp_06_autogluon_v1.csv"
final_df.to_csv(output_path, index=False)
print(f"\nProceso completado exitosamente. Resultados guardados en: {output_path}")
print(f"Resumen de predicciones:\n{final_df.describe()}")



Frequency 'M' stored as 'ME'
Beginning AutoGluon training...
AutoGluon will save models to 'c:\Users\Usuario\Documents\Universidad\austral\2025\Lab3\Lab3-MCD\notebooks\entregable\AutogluonModels\ag-20250628_163458'


Verificando datos...
Filas originales: 31362
Filas después de limpieza: 31362
Productos únicos a predecir: 780
TimeSeriesDataFrame creado exitosamente
Número de series temporales: 780

--- Entrenamiento con 1 ventana(s) ---


AutoGluon Version:  1.3.1
Python Version:     3.11.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          4
GPU Count:          0
Memory Avail:       3.36 GB / 15.89 GB (21.2%)
Disk Space Avail:   414.49 GB / 893.49 GB (46.4%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MAPE,
 'freq': 'ME',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 2}

train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 46 short time series from train_data. Only series with length >= 7 will be used for training.
	After filtering, train_data has 221

Predicciones obtenidas exitosamente
Predicciones para 1 ventanas procesadas

--- Entrenamiento con 2 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 75 short time series from train_data. Only series with length >= 9 will be used for training.
	After filtering, train_data has 21916 rows, 705 time series. Median time series length is 36 (min=9, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 13:48:54
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Seaso

Predicciones obtenidas exitosamente
Predicciones para 2 ventanas procesadas

--- Entrenamiento con 3 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 120 short time series from train_data. Only series with length >= 11 will be used for training.
	After filtering, train_data has 21496 rows, 660 time series. Median time series length is 36 (min=11, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 14:18:43
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 3 ventanas procesadas

--- Entrenamiento con 4 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 124 short time series from train_data. Only series with length >= 13 will be used for training.
	After filtering, train_data has 21452 rows, 656 time series. Median time series length is 36 (min=14, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 15:16:52
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 4 ventanas procesadas

--- Entrenamiento con 5 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 134 short time series from train_data. Only series with length >= 15 will be used for training.
	After filtering, train_data has 21312 rows, 646 time series. Median time series length is 36 (min=15, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 16:35:49
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 5 ventanas procesadas

--- Entrenamiento con 6 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 172 short time series from train_data. Only series with length >= 17 will be used for training.
	After filtering, train_data has 20723 rows, 608 time series. Median time series length is 36 (min=17, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 17:55:57
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 6 ventanas procesadas

--- Entrenamiento con 7 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 181 short time series from train_data. Only series with length >= 19 will be used for training.
	After filtering, train_data has 20566 rows, 599 time series. Median time series length is 36 (min=20, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 19:29:48
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 7 ventanas procesadas

--- Entrenamiento con 8 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 193 short time series from train_data. Only series with length >= 21 will be used for training.
	After filtering, train_data has 20326 rows, 587 time series. Median time series length is 36 (min=21, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 21:26:13
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 8 ventanas procesadas

--- Entrenamiento con 9 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 211 short time series from train_data. Only series with length >= 23 will be used for training.
	After filtering, train_data has 19942 rows, 569 time series. Median time series length is 36 (min=23, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-28 23:36:11
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 9 ventanas procesadas

--- Entrenamiento con 10 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 216 short time series from train_data. Only series with length >= 25 will be used for training.
	After filtering, train_data has 19823 rows, 564 time series. Median time series length is 36 (min=26, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-29 01:49:49
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 10 ventanas procesadas

--- Entrenamiento con 11 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 231 short time series from train_data. Only series with length >= 27 will be used for training.
	After filtering, train_data has 19433 rows, 549 time series. Median time series length is 36 (min=27, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-29 04:43:40
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 11 ventanas procesadas

--- Entrenamiento con 12 ventana(s) ---


train_data with frequency 'MS' has been resampled to frequency 'ME'.
Provided train_data has 22375 rows, 780 time series. Median time series length is 36 (min=4, max=36). 
	Removing 250 short time series from train_data. Only series with length >= 29 will be used for training.
	After filtering, train_data has 18914 rows, 530 time series. Median time series length is 36 (min=29, max=36). 

Provided data contains following columns:
	target: 'target'

AutoGluon will gauge predictive performance using evaluation metric: 'MAPE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-06-29 12:11:01
Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model Se

Predicciones obtenidas exitosamente
Predicciones para 12 ventanas procesadas

Consolidando resultados...
DataFrame consolidado: (780, 12)

Proceso completado exitosamente. Resultados guardados en: ./outputs/predicciones_exp_06_autogluon_v1.csv
Resumen de predicciones:
         product_id  pred_windows_1  pred_windows_2  pred_windows_3  \
count    780.000000      780.000000      780.000000      780.000000   
mean   20541.421795       36.721207       36.145354       31.146484   
std      353.984342       95.071766       92.591823       81.439107   
min    20001.000000        0.018760        0.016950        0.001067   
25%    20238.750000        2.118950        2.114841        1.708416   
50%    20511.500000        9.027587        8.939019        7.640506   
75%    20818.500000       25.363452       25.613850       22.256564   
max    21276.000000     1324.159347     1301.416123     1157.635832   

       pred_windows_4  pred_windows_5  pred_windows_6  pred_windows_7  \
count      780.000

In [57]:
# final_df = final_df.reset_index()
output_path = "./outputs/predicciones_exp_06_autogluon_v1_original.csv"
final_df.to_csv(output_path, index=False)

In [58]:
# 1. Definir pesos relativos
weights = pd.Series({
    'pred_windows_1': 1,    # 1 ventana → peso 1
    'pred_windows_2': 2,     # 2 ventanas → peso 2
    'pred_windows_3': 3,     # 3 ventanas → peso 3
    'pred_windows_4': 4,     # 4 ventanas → peso 4
    'pred_windows_5': 5,     # 5 ventanas → peso 5
    'pred_windows_6': 6,     # 6 ventanas → peso 6
    'pred_windows_7': 7,     # 7 ventanas → peso 7
    'pred_windows_8': 8,     # 8 ventanas → peso 8
    'pred_windows_9': 9,     # 9 ventanas → peso 9
    'pred_windows_10': 10,   # 10 ventanas → peso 10
    'pred_windows_11': 11,   # 11 ventanas → peso 11
    'pred_windows_12': 12   # 12 ventanas → peso 12
})

# 2. Para cada producto:
for product_id, row in final_df.iterrows():
    pred_1 = row['pred_windows_1']
    pred_2 = row['pred_windows_2']
    pred_3 = row['pred_windows_3']
    pred_4 = row['pred_windows_4']
    pred_5 = row['pred_windows_5']
    pred_6 = row['pred_windows_6']
    pred_7 = row['pred_windows_7']
    pred_8 = row['pred_windows_8']
    pred_9 = row['pred_windows_9']
    pred_10 = row['pred_windows_10']
    pred_11 = row['pred_windows_11']
    pred_12 = row['pred_windows_12']
    
    # 3. Aplicar ponderación
    total_ponderado = (pred_1 * weights['pred_windows_1']) + (pred_2 * weights['pred_windows_2']) + \
                      (pred_3 * weights['pred_windows_3']) + (pred_4 * weights['pred_windows_4']) + \
                      (pred_5 * weights['pred_windows_5']) + (pred_6 * weights['pred_windows_6']) + \
                      (pred_7 * weights['pred_windows_7']) + (pred_8 * weights['pred_windows_8']) + \
                      (pred_9 * weights['pred_windows_9']) + (pred_10 * weights['pred_windows_10']) + \
                      (pred_11 * weights['pred_windows_11']) + (pred_12 * weights['pred_windows_12'])
                      
    suma_pesos = weights.sum()  # 1 + 2 = 3
    
    # 4. Guardar resultado
    final_df.loc[product_id, 'pred_ponderado'] = total_ponderado / suma_pesos

In [59]:
final_df = final_df.reset_index()
output_path = "./outputs/predicciones_exp_06_autogluon_v1_ponderado.csv"
final_df.to_csv(output_path, index=False)

In [60]:
# final_df_copy.to_csv("./outputs/predicciones_exp_06_autogluon_v1_ponderado.csv", index=False)

In [62]:
df_ponderado = final_df.copy()
df_ponderado = df_ponderado[['product_id', 'pred_ponderado']]
df_ponderado[df_ponderado['pred_ponderado'] < 0]

Unnamed: 0,product_id,pred_ponderado
766,21233,-0.001476
767,21244,-0.007695
768,21245,-0.025902
769,21246,-0.00548
770,21248,-0.008186
772,21256,-0.012463
773,21259,-0.010841
774,21262,-0.007848
775,21263,-0.009546


In [63]:
df_12kfold = final_df.copy()
df_12kfold = df_12kfold[['product_id', 'pred_windows_12']]
df_12kfold[df_12kfold['pred_windows_12'] < 0]

Unnamed: 0,product_id,pred_windows_12


In [64]:
# 1. Merge para traer los valores de df_12kfold a df_ponderado
df_actualizado = df_ponderado.merge(
    df_12kfold[["product_id", "pred_windows_12"]],
    on="product_id",
    how="left"  # Conserva todas las filas de df_ponderado
)

# 2. Reemplazar pred_ponderado < 0 con pred_windows_12
df_actualizado["pred_ponderado"] = df_actualizado.apply(
    lambda row: row["pred_windows_12"] if row["pred_ponderado"] < 0 else row["pred_ponderado"],
    axis=1
)

# 3. Eliminar la columna auxiliar (opcional)
df_actualizado = df_actualizado.drop(columns=["pred_windows_12"])
df_actualizado[df_actualizado['pred_ponderado'] < 0]

Unnamed: 0,product_id,pred_ponderado


In [None]:
df_actualizado.to_csv("./outputs/predicciones_exp_06_autogluon_v1_ponderado.csv", sep=',' index=False)

In [67]:
df_12kfold.to_csv("./outputs/predicciones_exp_06_autogluon_v1_12kfold.csv", sep=',',index=False)

In [73]:
df_5kfold = final_df.copy()
df_5kfold = df_5kfold[['product_id', 'pred_windows_5']]

# 1. Merge para traer los valores de df_12kfold a df_ponderado
df_actualizado = df_5kfold.merge(
    df_12kfold[["product_id", "pred_windows_12"]],
    on="product_id",
    how="left"  # Conserva todas las filas de df_ponderado
)

# 2. Reemplazar pred_ponderado < 0 con pred_windows_12
df_actualizado["pred_windows_5"] = df_actualizado.apply(
    lambda row: row["pred_windows_12"] if row["pred_windows_5"] < 0 else row["pred_windows_5"],
    axis=1
)

# 3. Eliminar la columna auxiliar (opcional)
df_actualizado = df_actualizado.drop(columns=["pred_windows_12"])
df_actualizado[df_actualizado['pred_windows_5'] < 0]

Unnamed: 0,product_id,pred_windows_5


In [77]:
df_actualizado.to_csv("./outputs/predicciones_exp_06_autogluon_v1_5kfold.csv", sep=',',index=False)

In [80]:
df_10kfold = final_df.copy()
df_10kfold = df_10kfold[['product_id', 'pred_windows_10']]

# 1. Merge para traer los valores de df_12kfold a df_ponderado
df_actualizado = df_10kfold.merge(
    df_12kfold[["product_id", "pred_windows_12"]],
    on="product_id",
    how="left"  # Conserva todas las filas de df_ponderado
)

# 2. Reemplazar pred_ponderado < 0 con pred_windows_12
df_actualizado["pred_windows_10"] = df_actualizado.apply(
    lambda row: row["pred_windows_12"] if row["pred_windows_10"] < 0 else row["pred_windows_10"],
    axis=1
)

# 3. Eliminar la columna auxiliar (opcional)
df_actualizado = df_actualizado.drop(columns=["pred_windows_12"])
df_actualizado[df_actualizado['pred_windows_10'] < 0]

Unnamed: 0,product_id,pred_windows_10


In [81]:
df_actualizado.rename(columns={'pred_windows_10': 'tn'}, inplace=True)
df_actualizado.to_csv("./outputs/predicciones_exp_06_autogluon_v1_10kfold.csv", sep=',',index=False)