In [23]:
# Cell 1: Import tất cả các thư viện cần thiết
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

In [24]:
# Cell 2: Định nghĩa lớp Dataset
class M5MultiDataset(Dataset):
    def __init__(self, data, feature_cols, window_size):
        self.data = data
        self.cols = feature_cols
        self.win = window_size

    def __len__(self):
        return len(self.data) - self.win

    def __getitem__(self, idx):
        slice_ = self.data.iloc[idx:idx+self.win]
        X_num = slice_[self.cols].values.astype('float32')  # (win, num_features)
        item_idx = slice_['item_idx'].values.astype('int64')  # (win,)
        store_idx = slice_['store_idx'].values.astype('int64')  # (win,)
        y = self.data['sales'].iloc[idx+self.win].astype('float32')
        return (
            torch.tensor(X_num),  # (win, num_features)
            torch.tensor(item_idx),  # (win,)
            torch.tensor(store_idx)  # (win,)
        ), torch.tensor(y)

In [25]:
# Cell 3: Đọc dữ liệu đã xử lý
df = pd.read_csv('processed_data.csv')

  df = pd.read_csv('processed_data.csv')


In [26]:
# Cell 4: Định nghĩa feature columns
FEATURE_COLS = [
    'sell_price', 'snap_CA', 'snap_TX', 'snap_WI',
] + [f'sales_lag_{l}' for l in (7, 14, 28)] + ['rolling_mean_7'] + \
[c for c in df.columns if c.startswith('evt_')]

In [27]:
# Cell 5: Lấy số lượng item và store
num_items = df['item_idx'].nunique()
num_stores = df['store_idx'].nunique()


In [28]:
# Cell 6: Chia train/val và tạo DataLoader
split = int(len(df) * 0.8)
df_train, df_val = df.iloc[:split], df.iloc[split:]
train_ds = M5MultiDataset(df_train, FEATURE_COLS, window_size=28)
val_ds = M5MultiDataset(df_val, FEATURE_COLS, window_size=28)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

In [29]:
# Cell 7: Lưu thông tin num_items, num_stores, feature_cols
with open('model_params.txt', 'w') as f:
    f.write(f'num_items: {num_items}\n')
    f.write(f'num_stores: {num_stores}\n')
    f.write(f'feature_cols: {FEATURE_COLS}\n')
print('Saved num_items, num_stores, and feature_cols to model_params.txt')

Saved num_items, num_stores, and feature_cols to model_params.txt
