In [1]:
import pandas as pd

# Cargar sell-in.txt (puede ser un archivo grande, leer solo columnas necesarias)
sellin_cols = ['periodo', 'customer_id', 'product_id', 'plan_precios_cuidados', 'cust_request_qty', 'cust_request_tn', 'tn']
df_sellin = pd.read_csv('sell-in.txt', sep='\t', usecols=sellin_cols)
df_sellin.head()

Unnamed: 0,periodo,customer_id,product_id,plan_precios_cuidados,cust_request_qty,cust_request_tn,tn
0,201701,10234,20524,0,2,0.053,0.053
1,201701,10032,20524,0,1,0.13628,0.13628
2,201701,10217,20524,0,1,0.03028,0.03028
3,201701,10125,20524,0,1,0.02271,0.02271
4,201701,10012,20524,0,11,1.54452,1.54452


In [2]:
# Contar valores únicos de customer_id
df_sellin['customer_id'].nunique()
# Contar valores únicos de product_id
#df_sellin['product_id'].nunique()
# Contar valores únicos de periodo
#df_sellin['periodo'].nunique()


597

In [3]:
# Si 'periodo' es tipo string o int, conviértelo a datetime para mayor facilidad
df_sellin['periodo'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')
df_sellin= df_sellin.sort_values(['product_id', 'customer_id', 'periodo']).reset_index(drop=True)


In [4]:
# Agrupo por 'product_id' y período, y calculo la suma de 'cust_request_qty' y 'tn'
df_agg = df_sellin.groupby(['product_id', 'periodo']).agg({'cust_request_qty': 'sum','tn': 'sum'}).reset_index()   
df_agg.tail() 

Unnamed: 0,product_id,periodo,cust_request_qty,tn
31238,21295,2017-01-01,1,0.00699
31239,21296,2017-08-01,1,0.00651
31240,21297,2017-01-01,1,0.00579
31241,21298,2017-08-01,1,0.00573
31242,21299,2017-08-01,1,0.00546


Entrenamiento con AutoGluon

Preparar los datos de entrenamiento y test
Entrenamiento: Usa todos los datos donde tn_t_plus_2 no es NaN y el período es menor a 201912 (para no usar datos del futuro).
Test: Filtra las filas donde el período es 201912 (diciembre 2019), ya que para esas filas queremos predecir tn en 202002 (febrero 2020).

Training con AutoGluon

In [5]:
df_agg = df_agg.rename(columns={
    'product_id': 'item_id',
    'periodo': 'timestamp'
})

print(df_agg.columns.tolist())

['item_id', 'timestamp', 'cust_request_qty', 'tn']


In [None]:
# Tomo datos hasta octubre de 2019
df_201910 = df_agg[df_agg['timestamp'] <= '2019-10-01']


(31243, 4)

In [22]:
import warnings
warnings.filterwarnings("ignore")
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor



# Entrenar el modelo con AutoGluon
predictor = TimeSeriesPredictor(
    target='tn', 
    prediction_length=4,
    freq='MS',
    eval_metric='WQL'
).fit(
    train_data=df_201910, 
    num_val_windows=2,
    val_step_size=1
)



Beginning AutoGluon training...
AutoGluon will save models to '/Users/fernandopedroarena/Documents/Documents/LABO III/AutogluonModels/ag-20250706_224430'
AutoGluon Version:  1.3.1
Python Version:     3.10.11
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132
CPU Count:          10
GPU Count:          0
Memory Avail:       3.90 GB / 16.00 GB (24.4%)
Disk Space Avail:   107.86 GB / 228.27 GB (47.3%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 4,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'val_step_size': 1,
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampl

Monitoring

In [7]:
# Entender la contribución de cada modelo

predictor.leaderboard()


Unnamed: 0,model,score_val,pred_time_val,fit_time_marginal,fit_order
0,WeightedEnsemble,-0.234701,4.588336,1.19065,13
1,ChronosFineTuned[bolt_small],-0.243893,0.685523,338.523998,8
2,TemporalFusionTransformer,-0.250849,0.378538,233.937389,9
3,PatchTST,-0.251168,0.171898,48.146167,11
4,ChronosZeroShot[bolt_base],-0.252908,2.572163,6.66495,7
5,DeepAR,-0.254025,0.602808,71.264351,10
6,AutoETS,-0.254848,2.402062,2.282129,6
7,DynamicOptimizedTheta,-0.25603,0.315581,1.355168,5
8,RecursiveTabular,-0.256607,0.031926,1.994364,2
9,TiDE,-0.262903,0.429968,123.705486,12


In [8]:
# Instalar bokeh
#pip install bokeh
predictor.fit_summary()

****************** Summary of fit() ******************
Estimated performance of each model:
                           model  score_val  pred_time_val  fit_time_marginal  \
0               WeightedEnsemble  -0.234701       4.588336           1.190650   
1   ChronosFineTuned[bolt_small]  -0.243893       0.685523         338.523998   
2      TemporalFusionTransformer  -0.250849       0.378538         233.937389   
3                       PatchTST  -0.251168       0.171898          48.146167   
4     ChronosZeroShot[bolt_base]  -0.252908       2.572163           6.664950   
5                         DeepAR  -0.254025       0.602808          71.264351   
6                        AutoETS  -0.254848       2.402062           2.282129   
7          DynamicOptimizedTheta  -0.256030       0.315581           1.355168   
8               RecursiveTabular  -0.256607       0.031926           1.994364   
9                           TiDE  -0.262903       0.429968         123.705486   
10               

{'model_types': {'SeasonalNaive': 'MultiWindowBacktestingModel',
  'RecursiveTabular': 'MultiWindowBacktestingModel',
  'DirectTabular': 'MultiWindowBacktestingModel',
  'NPTS': 'MultiWindowBacktestingModel',
  'DynamicOptimizedTheta': 'MultiWindowBacktestingModel',
  'AutoETS': 'MultiWindowBacktestingModel',
  'ChronosZeroShot[bolt_base]': 'MultiWindowBacktestingModel',
  'ChronosFineTuned[bolt_small]': 'MultiWindowBacktestingModel',
  'TemporalFusionTransformer': 'MultiWindowBacktestingModel',
  'DeepAR': 'MultiWindowBacktestingModel',
  'PatchTST': 'MultiWindowBacktestingModel',
  'TiDE': 'MultiWindowBacktestingModel',
  'WeightedEnsemble': 'GreedyEnsemble'},
 'model_performance': {'SeasonalNaive': -0.3293169775463332,
  'RecursiveTabular': -0.25660720047335017,
  'DirectTabular': -0.2983208647538319,
  'NPTS': -0.4107290137947305,
  'DynamicOptimizedTheta': -0.25603044164176847,
  'AutoETS': -0.25484778031630073,
  'ChronosZeroShot[bolt_base]': -0.2529083972566761,
  'ChronosFineTu

In [9]:
predictor.feature_importance()

Computing feature importance


Unnamed: 0,importance,stdev,n,p99_low,p99_high
cust_request_qty,-0.000539,0.001288,5.0,-0.00319,0.002113


### Predicción Feb. 2020

In [23]:
predictions = predictor.predict(df_201910)

data with frequency 'IRREG' has been resampled to frequency 'MS'.
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [24]:
# Tomar solo item_id y la predicción 'mean'
# Filtrar solo febrero 2020
resultado = predictions['mean'].reset_index()
resultado = resultado[resultado['timestamp'] == '2020-02-01']

# Renombrar columnas
resultado = resultado[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']
resultado.head()


Unnamed: 0,product_id,tn
3,20001,1307.985231
7,20002,1084.355767
11,20003,798.554385
15,20004,631.774953
19,20005,604.156577


---

#### Predicciones para diciembre 2019 (validación para stacking)

In [25]:
# Tomar solo item_id y la predicción 'mean'
# Filtrar solo diciembre 2019
resultado2 = predictions['mean'].reset_index()
resultado2 = resultado2[resultado2['timestamp'] == '2019-12-01']

# Renombrar columnas
resultado2 = resultado2[['item_id', 'mean']]
resultado2.columns = ['product_id', 'tn']
resultado2.head()

Unnamed: 0,product_id,tn
1,20001,1356.478165
5,20002,1232.46492
9,20003,886.10591
13,20004,718.890832
17,20005,656.209537


In [26]:
# Exportar a CSV 
resultado2.to_csv('ridge_val_autogluon.csv', index=False)

---

Archivo para Kaggle

In [27]:
# Cargar product_id_apredecir201912.txt
df_ids = pd.read_csv('product_id_apredecir201912.txt')
df_ids.head()

# Transformar product_id a string
#df_ids['product_id'] = df_ids['product_id'].astype(str)

Unnamed: 0,product_id
0,20001
1,20002
2,20003
3,20004
4,20005


In [28]:
# Merge para obtener los product_id que se deben predecir
resultado = df_ids.merge(resultado, on='product_id', how='left')
#predictions_v1.shape
resultado.head()

# Exportar a CSV 
resultado.to_csv('submission_AGP.csv', index=False)