In [None]:
# === BLOQUE 3 - Autogluon ===

import pandas as pd
%pip install autogluon.timeseries
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
import os

#  Rutas reutilizando INTERMEDIOS
BASE_DATOS     = r'C:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\datasets'
INTERMEDIOS   = r'C:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\entrega_final\intermedios'
BASE_OUTPUTS   = r'C:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\entrega_final\output'

# Crear carpetas si no existen
os.makedirs(INTERMEDIOS, exist_ok=True)
os.makedirs(BASE_OUTPUTS, exist_ok=True)

PRED_PATH = os.path.join(BASE_OUTPUTS,'pred_modelo_autogluon.csv')


#  Carga de archivos desde Drive
print("🔄 Cargando archivos desde Drive...")


productos_pred = pd.read_csv(os.path.join(BASE_DATOS, "productos_pred.txt"), sep="\t")
df_sellin             = pd.read_csv(os.path.join(BASE_DATOS, "sell-in.txt"),sep="\t")
df_productos      = pd.read_csv(os.path.join(BASE_DATOS, "tb_productos.txt"),   sep="\t")




# 📄 Leer lista de productos a predecir
# 1. Asegúrate de que sean enteros
productos_pred['product_id'] = productos_pred['product_id'].astype(int)

# 2. Extrae la lista
product_ids = productos_pred['product_id'].tolist()

# 🧹 3. Preprocesamiento
# Convertir periodo a datetime
df_sellin['timestamp'] = pd.to_datetime(df_sellin['periodo'], format='%Y%m')

# Filtrar hasta dic 2019 y productos requeridos
df_filtered = df_sellin[
    (df_sellin['timestamp'] <= '2019-12-01') &
    (df_sellin['product_id'].isin(product_ids))
]

# Agregar tn por periodo, cliente y producto
df_grouped = df_filtered.groupby(['timestamp', 'customer_id', 'product_id'], as_index=False)['tn'].sum()

# Agregar tn total por periodo y producto
df_monthly_product = df_grouped.groupby(['timestamp', 'product_id'], as_index=False)['tn'].sum()

# Agregar columna 'item_id' para AutoGluon
df_monthly_product['item_id'] = df_monthly_product['product_id']

# ⏰ 4. Crear TimeSeriesDataFrame
ts_data = TimeSeriesDataFrame.from_data_frame(
    df_monthly_product,
    id_column='item_id',
    timestamp_column='timestamp'
)

# Completar valores faltantes
ts_data = ts_data.fill_missing_values()

# ⚙️ 5. Definir y entrenar predictor
predictor = TimeSeriesPredictor(
    prediction_length=2,
    target='tn',
    freq='MS'  # Frecuencia mensual (Month Start),
)

predictor.fit(ts_data, num_val_windows=2, time_limit=60*60)

# 🔮 6. Generar predicción
forecast = predictor.predict(ts_data)

# Extraer predicción media y filtrar febrero 2020
forecast_mean = forecast['mean'].reset_index()
print(forecast_mean.columns)


# Tomar solo item_id y la predicción 'mean'
resultado = forecast['mean'].reset_index()[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']

# Filtrar solo febrero 2020
resultado = forecast['mean'].reset_index()
resultado = resultado[resultado['timestamp'] == '2020-02-01']#Colocar mes a predecir '2020-02-01'

# Renombrar columnas
resultado = resultado[['item_id', 'mean']]
resultado.columns = ['product_id', 'tn']


# === 8. Guardar en CSV usando PRED_PATH ===
resultado.to_csv(PRED_PATH, index=False)
print(f"✅ Guardado en: {PRED_PATH}")


Collecting autogluon.timeseriesNote: you may need to restart the kernel to use updated packages.

  Downloading autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting torch<2.7,>=2.2 (from autogluon.timeseries)
  Downloading torch-2.6.0-cp311-cp311-win_amd64.whl.metadata (28 kB)
Collecting lightning<2.7,>=2.2 (from autogluon.timeseries)
  Downloading lightning-2.5.2-py3-none-any.whl.metadata (38 kB)
Collecting pytorch-lightning (from autogluon.timeseries)
  Downloading pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting transformers<4.50,>=4.38.0 (from transformers[sentencepiece]<4.50,>=4.38.0->autogluon.timeseries)
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
     ---------------------------------------- 0.0/44.0 kB ? eta -:--:--
     ---------------------------------------- 44.0/44.0 kB 2.1 MB/s eta 0:00:00
Collecting accelerate<2.0,>=0.34.0 (from autogluon.timeseries)
  Downloading accelerate-1.9.0-py3-none-any.whl.metadata (1

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.7.0 requires botocore<1.31.65,>=1.31.16, but you have botocore 1.39.8 which is incompatible.


🔄 Cargando archivos desde Drive...


Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to 'c:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\entrega_final\AutogluonModels\ag-20250717_224310'
AutoGluon Version:  1.3.1
Python Version:     3.11.7
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.19045
CPU Count:          4
GPU Count:          0
Memory Avail:       0.59 GB / 7.91 GB (7.5%)
Disk Space Avail:   23.67 GB / 222.93 GB (10.6%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'freq': 'MS',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 2,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'tn',
 'time_limit': 3600,
 'verbosity': 2}

train_data with frequency 'IRREG' has been resampled to frequency 'MS'.
Provided train_data has 22375 r

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/821M [00:00<?, ?B/s]

	-0.1905       = Validation score (-WQL)
	170.81  s     = Training runtime
	12.80   s     = Validation (prediction) runtime
Training timeseries model ChronosFineTuned[bolt_small]. Training for up to 543.8s of the 3262.6s of remaining time.
	Skipping covariate_regressor since the dataset contains no covariates or static features.


config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/191M [00:00<?, ?B/s]

	Fine-tuning on the CPU detected. We recommend using a GPU for faster fine-tuning of Chronos.
	Saving fine-tuned model to c:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\entrega_final\AutogluonModels\ag-20250717_224310\models\ChronosFineTuned[bolt_small]\W0\fine-tuned-ckpt
	Skipping covariate_regressor since the dataset contains no covariates or static features.
	Fine-tuning on the CPU detected. We recommend using a GPU for faster fine-tuning of Chronos.
	Saving fine-tuned model to c:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\entrega_final\AutogluonModels\ag-20250717_224310\models\ChronosFineTuned[bolt_small]\W1\fine-tuned-ckpt
	-0.1823       = Validation score (-WQL)
	500.33  s     = Training runtime
	3.21    s     = Validation (prediction) runtime
Training timeseries model TemporalFusionTransformer. Training for up to 551.8s of the 2759.1s of remaining time.
	-0.1925       = Validation score (-WQL)
	524.79  s     = Training runtime
	1.45    s     

Index(['item_id', 'timestamp', 'mean'], dtype='object')


ValueError: Invalid file path or buffer object type: <class 'tuple'>

In [7]:
# === 8. Guardar en CSV usando PRED_PATH ===
resultado.to_csv(PRED_PATH, index=False)
print(f"✅ Guardado en: {PRED_PATH}")

✅ Guardado en: C:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\entrega_final\output\pred_modelo_autogluon.csv


In [8]:
resultado.head()

Unnamed: 0,product_id,tn
1,20001,1313.879416
3,20002,1093.024428
5,20003,686.143588
7,20004,515.945564
9,20005,502.793598


##PREDICCION 201912 - CALCULO DE ERRORES

In [13]:
# 1) Filtrar valores reales de diciembre-2019
actual = (
    df_monthly_product
      .query("timestamp == '2019-12-01'")
      .loc[:, ['product_id', 'tn']]
      .rename(columns={'tn': 'tn_real'})
)

# 2) Unir predicción y real
#    'resultado' debe tener columnas ['product_id','tn'] donde 'tn' es la predicha.
df_comp = actual.merge(resultado, on='product_id', how='left')

# 3) (Opcional) Agregar / sumar por product_id si hubiera varias filas
df_agg = (
    df_comp
      .groupby('product_id')[['tn_real', 'tn']]
      .sum()
      .reset_index()
      .rename(columns={'tn': 'tn_pred'})
)

# 4) Calcular errores
df_agg['abs_error'] = (df_agg['tn_real'] - df_agg['tn_pred']).abs()
df_agg['pct_error'] = df_agg['abs_error'] / df_agg['tn_real']

 #Definir ruta de salida (reusa BASE_OUTPUTS si ya la tienes)
ERROR_CSV_PATH = os.path.join(BASE_OUTPUTS, 'error_modelo_autogluon.csv')

# Exportar a CSV
df_agg.to_csv(ERROR_CSV_PATH, index=False)

print(f"✅ Resultados exportados a: {ERROR_CSV_PATH}")
resultado.head()

✅ Resultados exportados a: C:\Users\Elisabeth\Desktop\MAESTRIA_AUSTRAL\Labo_III\labo3-2025v\entrega_final\output\error_modelo_autogluon.csv


Unnamed: 0,product_id,tn
1,20001,1313.879416
3,20002,1093.024428
5,20003,686.143588
7,20004,515.945564
9,20005,502.793598


In [11]:
df_agg.head()

Unnamed: 0,product_id,tn_real,tn,abs_error,pct_error
0,20001,1504.68856,1313.879416,190.809144,0.12681
1,20002,1087.30855,1093.024428,5.715878,0.005257
2,20003,892.50129,686.143588,206.357702,0.231213
3,20004,637.90002,515.945564,121.954456,0.191181
4,20005,593.24443,502.793598,90.450832,0.152468
