<a href="https://colab.research.google.com/github/XETI-NEWBIE/Python/blob/main/LSTM%EC%9D%84_%ED%99%9C%EC%9A%A9%ED%95%9C_%EC%8B%9D%EC%9D%8C%EC%97%85%EC%9E%A5_%EB%A9%94%EB%89%B4_%EC%88%98%EC%9A%94_%EC%98%88%EC%B8%A1_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import sys, platform
print("Python:", sys.version)
try:
    import torch
    print("Torch:", torch.__version__)
except Exception as e:
    print("Torch import failed:", e)


Python: 3.12.11 (main, Jun  4 2025, 08:56:18) [GCC 11.4.0]
Torch: 2.8.0+cu126


In [None]:
!pip uninstall -y torch torchvision torchaudio
!pip cache purge
# Python 3.12 호환 버전 설치 (CUDA 포함 휠)
!pip install --upgrade torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu121


Found existing installation: torch 2.8.0+cu126
Uninstalling torch-2.8.0+cu126:
  Successfully uninstalled torch-2.8.0+cu126
Found existing installation: torchvision 0.23.0+cu126
Uninstalling torchvision-0.23.0+cu126:
  Successfully uninstalled torchvision-0.23.0+cu126
Found existing installation: torchaudio 2.8.0+cu126
Uninstalling torchaudio-2.8.0+cu126:
  Successfully uninstalled torchaudio-2.8.0+cu126
[0mFiles removed: 0
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch==2.4.0
  Downloading https://download.pytorch.org/whl/cu121/torch-2.4.0%2Bcu121-cp312-cp312-linux_x86_64.whl (799.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m799.0/799.0 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.19.0
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.19.0%2Bcu121-cp312-cp312-linux_x86_64.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m5.8 MB/s[0m et

In [1]:
import torch, torchvision, torchaudio
print("Torch:", torch.__version__, "| CUDA available:", torch.cuda.is_available())

Torch: 2.4.0+cu121 | CUDA available: False


In [51]:
import os
for p in ["/data/train", "/data/test"]:
    os.makedirs(p, exist_ok=True)

# 여러분의 파일을 /content에 두셨다면 다음처럼 옮기거나 복사해 주세요 (예시)
# !cp /content/train.csv /data/train/train.csv
# !cp /content/sample_submission.csv /data/sample_submission.csv
# !cp /content/TEST_*.csv /data/test/

In [52]:
# -------------------------
# Imports & Device
# -------------------------
import os, re, glob, math, random, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

if 'DEVICE' not in globals():
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE, "| CUDA available:", torch.cuda.is_available())

DEVICE: cpu | CUDA available: False


In [53]:
# -------------------------
# Config
# -------------------------
DATA_DIR   = "/data"
TRAIN_PATH = f"{DATA_DIR}/train/train.csv"
TEST_GLOB  = f"{DATA_DIR}/test/TEST_*.csv"
SAMPLE_SUB = f"{DATA_DIR}/sample_submission.csv"

LOOKBACK = 28
HORIZON  = 7

BATCH_SIZE = 256
EPOCHS     = 35
LR         = 1e-3        # 안정 수렴을 위해 살짝 낮춤
PATIENCE   = 6

# 손실 혼합 가중치 (SMAPE에 더 무게)
ALPHA_SM   = 0.7         # 최종 loss = ALPHA_SM*SMAPE + (1-ALPHA_SM)*MSE

# 업장 가중치(담하, 미라시아 강화) - 과도하면 흔들릴 수 있으니 적당히
IMPORTANT_STORES = {"담하", "미라시아"}
IMPORTANT_WEIGHT = 1.5
DEFAULT_WEIGHT   = 1.0

# 재현성
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
set_seed(42)


In [54]:
# -------------------------
# Utils
# -------------------------
def parse_shop(name: str) -> str:
    if isinstance(name, str):
        return name.split("_", 1)[0]
    return str(name)

def _clean_counts(v):
    v = np.asarray(v, dtype=float)
    v = np.where(np.isfinite(v), v, 0.0)
    v = np.maximum(v, 0.0)
    return v

def smape_numpy(y_true, y_pred, eps=1e-5):
    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
    mask = (y_true != 0)
    if mask.sum() == 0:
        return 0.0
    num = 2.0 * np.abs(y_true[mask] - y_pred[mask])
    den = np.abs(y_true[mask]) + np.abs(y_pred[mask]) + eps
    return float(np.mean(num / den))

def smape_torch_weighted(y_true, y_pred, w, eps=1e-5):
    # y_true,y_pred: (B,HZ) - ORIGINAL scale / w:(B,)
    y_true = torch.nan_to_num(y_true, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)
    y_pred = torch.nan_to_num(y_pred, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)
    mask = (y_true != 0.0).float()
    num = 2.0 * torch.abs(y_true - y_pred)
    den = torch.abs(y_true) + torch.abs(y_pred) + eps
    sm  = (num / den) * mask                       # (B,HZ)
    per_sample = sm.sum(dim=1) / torch.clamp(mask.sum(dim=1), min=1.0)  # (B,)
    w = w.view(-1)
    return (per_sample * w).sum() / torch.clamp(w.sum(), min=1.0)


In [55]:
# -------------------------
# Weighted SMAPE (store-importance aware)
# -------------------------
def smape_torch_weighted(y_true, y_pred, w, eps=1e-5):
    """
    y_true, y_pred: (B, HZ) on ORIGINAL scale
    w: (B,) sample weight (e.g., 담하/미라시아 2.0, others 1.0)
    - 실제=0인 타임스텝은 제외
    - 각 샘플(HZ 평균) 후 샘플 가중 평균
    """
    # 보호: NaN/Inf 제거 및 음수 제거
    y_true = torch.nan_to_num(y_true, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)
    y_pred = torch.nan_to_num(y_pred, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)

    mask = (y_true != 0.0).float()  # (B, HZ)
    num = 2.0 * torch.abs(y_true - y_pred)
    den = torch.abs(y_true) + torch.abs(y_pred) + eps
    sm = (num / den) * mask  # (B, HZ)

    per_sample = sm.sum(dim=1) / torch.clamp(mask.sum(dim=1), min=1.0)  # (B,)
    w = w.view(-1)  # (B,)
    return (per_sample * w).sum() / torch.clamp(w.sum(), min=1.0)


In [56]:
# -------------------------
# Data Preparation (확장 피처)
# -------------------------
# 1) train 로드 & 기본 전처리
train = pd.read_csv(TRAIN_PATH)
train["영업일자"] = pd.to_datetime(train["영업일자"])
train["영업장명_메뉴명"] = train["영업장명_메뉴명"].astype(str)
train["업장명"] = train["영업장명_메뉴명"].apply(parse_shop)
train = train.sort_values(["영업장명_메뉴명", "영업일자"]).reset_index(drop=True)

# 2) vocab (UNK=0)
items = sorted(train["영업장명_메뉴명"].unique().tolist())
shops = sorted(train["업장명"].unique().tolist())
item2id = {it: i+1 for i, it in enumerate(items)}
shop2id = {sp: i+1 for i, sp in enumerate(shops)}
UNK_ITEM, UNK_SHOP = 0, 0
N_ITEMS = len(item2id) + 1
N_SHOPS = len(shop2id) + 1

# 3) 윈도우 생성
def build_windows_ext(df: pd.DataFrame, lookback=LOOKBACK, horizon=HORIZON):
    X, Xrm7, Xrm28 = [], [], []
    Xdow, Xmon, Xwoy, Xdoy = [], [], [], []
    Fdow, Fmon, Fwoy, Fdoy = [], [], [], []
    Y = []
    item_list, shop_list, wt_list, scale_list, date_key_list = [], [], [], [], []

    for (itm), g in df.groupby("영업장명_메뉴명"):
        g = g.sort_values("영업일자")
        vals  = _clean_counts(g["매출수량"].values)
        dates = pd.to_datetime(g["영업일자"]).values

        shop_name = g["업장명"].iloc[0]
        sid  = shop2id.get(shop_name, UNK_SHOP)
        iid  = item2id.get(itm, UNK_ITEM)
        w    = IMPORTANT_WEIGHT if shop_name in IMPORTANT_STORES else DEFAULT_WEIGHT

        # 전체 시리즈의 날짜 피처(정수)
        dates_pd = pd.to_datetime(g["영업일자"])
        d_dow = dates_pd.dt.dayofweek.values.astype(np.int64)          # 0..6
        d_mon = dates_pd.dt.month.values.astype(np.int64)              # 1..12
        d_woy = dates_pd.dt.isocalendar().week.values.astype(np.int64) # 1..53(54 가능성 대비)
        d_doy = dates_pd.dt.dayofyear.values.astype(np.int64)          # 1..365(윤년 366)

        # rolling (전구간)
        s = pd.Series(vals)
        r7  = s.rolling(7,  min_periods=1).mean().values
        r28 = s.rolling(28, min_periods=1).mean().values

        for t in range(lookback, len(vals) - horizon + 1):
            past_idx = slice(t - lookback, t)
            fut_idx  = slice(t, t + horizon)

            x_seq  = vals[past_idx]
            x_rm7  = r7[past_idx]
            x_rm28 = r28[past_idx]

            x_dow = d_dow[past_idx]
            x_mon = d_mon[past_idx]
            x_woy = d_woy[past_idx]
            x_doy = d_doy[past_idx]

            y_seq = vals[fut_idx]

            # scale (log1p mean of past window)
            x_log    = np.log1p(x_seq)
            rm7_log  = np.log1p(np.maximum(x_rm7,  0.0))
            rm28_log = np.log1p(np.maximum(x_rm28, 0.0))
            y_log    = np.log1p(y_seq)

            scale = float(np.nanmean(x_log))
            if not np.isfinite(scale) or abs(scale) < 1e-8:
                scale = 1.0

            X.append((x_log/scale).astype(np.float32))
            Xrm7.append((rm7_log/scale).astype(np.float32))
            Xrm28.append((rm28_log/scale).astype(np.float32))
            Y.append((y_log/scale).astype(np.float32))

            # future features from last observed date
            last_date = pd.to_datetime(dates[past_idx][-1])
            fut_dates = [last_date + pd.Timedelta(days=k) for k in range(1, horizon+1)]
            Fdow.append(np.array([d.dayofweek          for d in fut_dates], dtype=np.int64))
            Fmon.append(np.array([d.month              for d in fut_dates], dtype=np.int64))
            Fwoy.append(np.array([d.isocalendar().week for d in fut_dates], dtype=np.int64))
            Fdoy.append(np.array([d.dayofyear          for d in fut_dates], dtype=np.int64))

            Xdow.append(x_dow.astype(np.int64))
            Xmon.append(x_mon.astype(np.int64))
            Xwoy.append(x_woy.astype(np.int64))
            Xdoy.append(x_doy.astype(np.int64))

            item_list.append(iid)
            shop_list.append(sid)
            wt_list.append(w)
            scale_list.append(scale)
            date_key_list.append(pd.to_datetime(dates[t]).to_datetime64())

    to_np = lambda L, shape, dtype: np.stack(L) if len(L) else np.empty(shape, dtype)
    data = {
        "X":     to_np(X,     (0, lookback), np.float32),
        "Xrm7":  to_np(Xrm7,  (0, lookback), np.float32),
        "Xrm28": to_np(Xrm28, (0, lookback), np.float32),

        "Xdow":  to_np(Xdow,  (0, lookback), np.int64),
        "Xmon":  to_np(Xmon,  (0, lookback), np.int64),
        "Xwoy":  to_np(Xwoy,  (0, lookback), np.int64),
        "Xdoy":  to_np(Xdoy,  (0, lookback), np.int64),

        "Fdow":  to_np(Fdow,  (0, horizon),  np.int64),
        "Fmon":  to_np(Fmon,  (0, horizon),  np.int64),
        "Fwoy":  to_np(Fwoy,  (0, horizon),  np.int64),
        "Fdoy":  to_np(Fdoy,  (0, horizon),  np.int64),

        "Y":     to_np(Y,     (0, horizon),  np.float32),

        "item":  np.array(item_list,  dtype=np.int64),
        "shop":  np.array(shop_list,  dtype=np.int64),
        "w":     np.array(wt_list,    dtype=np.float32),
        "scale": np.array(scale_list, dtype=np.float32).reshape(-1, 1),
        "date_key": np.array(date_key_list),
    }
    return data

win = build_windows_ext(train, LOOKBACK, HORIZON)
print("windows:", {k: v.shape for k, v in win.items() if isinstance(v, np.ndarray)})


windows: {'X': (96114, 28), 'Xrm7': (96114, 28), 'Xrm28': (96114, 28), 'Xdow': (96114, 28), 'Xmon': (96114, 28), 'Xwoy': (96114, 28), 'Xdoy': (96114, 28), 'Fdow': (96114, 7), 'Fmon': (96114, 7), 'Fwoy': (96114, 7), 'Fdoy': (96114, 7), 'Y': (96114, 7), 'item': (96114,), 'shop': (96114,), 'w': (96114,), 'scale': (96114, 1), 'date_key': (96114,)}


In [57]:
# -------------------------
# Train/Valid Split (time-based)
# -------------------------
if len(win["date_key"]) == 0:
    raise RuntimeError("No training windows were generated. Check data ranges.")

threshold_date = pd.to_datetime(np.quantile(win["date_key"], 0.90))
train_idx = (pd.to_datetime(win["date_key"]) < threshold_date).nonzero()[0]
valid_idx = (pd.to_datetime(win["date_key"]) >= threshold_date).nonzero()[0]

def subset(data, idx):
    sub = {}
    for k, v in data.items():
        sub[k] = v[idx] if isinstance(v, np.ndarray) else v
    return sub

train_data = subset(win, train_idx)
valid_data = subset(win, valid_idx)

In [58]:
# -------------------------
# Dataset / DataLoader (확장 피처 16개 반환)
# -------------------------
class WindowDataset(Dataset):
    def __init__(self, data):
        self.X     = data["X"]
        self.Xrm7  = data["Xrm7"]
        self.Xrm28 = data["Xrm28"]

        self.Xdow  = data["Xdow"]
        self.Xmon  = data["Xmon"]
        self.Xwoy  = data["Xwoy"]
        self.Xdoy  = data["Xdoy"]

        self.Fdow  = data["Fdow"]
        self.Fmon  = data["Fmon"]
        self.Fwoy  = data["Fwoy"]
        self.Fdoy  = data["Fdoy"]

        self.Y     = data["Y"]
        self.item  = data["item"]
        self.shop  = data["shop"]
        self.w     = data["w"]
        self.scale = data["scale"]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return (
            torch.from_numpy(self.X[i]).float(),       # (LB,)
            torch.from_numpy(self.Xrm7[i]).float(),    # (LB,)
            torch.from_numpy(self.Xrm28[i]).float(),   # (LB,)

            torch.from_numpy(self.Xdow[i]).long(),     # (LB,)
            torch.from_numpy(self.Xmon[i]).long(),     # (LB,)
            torch.from_numpy(self.Xwoy[i]).long(),     # (LB,)
            torch.from_numpy(self.Xdoy[i]).long(),     # (LB,)

            torch.from_numpy(self.Y[i]).float(),       # (HZ,)
            torch.tensor(self.item[i]).long(),         # ()
            torch.tensor(self.shop[i]).long(),         # ()
            torch.tensor(self.w[i]).float(),           # ()
            torch.from_numpy(self.scale[i]).float(),   # (1,)

            torch.from_numpy(self.Fdow[i]).long(),     # (HZ,)
            torch.from_numpy(self.Fmon[i]).long(),     # (HZ,)
            torch.from_numpy(self.Fwoy[i]).long(),     # (HZ,)
            torch.from_numpy(self.Fdoy[i]).long(),     # (HZ,)
        )

num_workers = 0 if DEVICE.type == "cpu" else 2
pin_mem     = (DEVICE.type == "cuda")

train_loader = DataLoader(WindowDataset(train_data), batch_size=BATCH_SIZE, shuffle=True,
                          drop_last=False, num_workers=num_workers, pin_memory=pin_mem)
valid_loader = DataLoader(WindowDataset(valid_data), batch_size=BATCH_SIZE, shuffle=False,
                          drop_last=False, num_workers=num_workers, pin_memory=pin_mem)


In [59]:
# -------------------------
# Model (GRU + robust indexing)
# -------------------------
class GlobalGRUForecaster(nn.Module):
    def __init__(self,
                 n_items, n_shops,
                 item_emb_dim=32, shop_emb_dim=8,
                 dow_emb_dim=4, mon_emb_dim=4, woy_emb_dim=6, doy_emb_dim=8,
                 horizon_emb_dim=8,
                 hidden_dim=128, num_layers=2, horizon=7, dropout=0.15):
        super().__init__()
        self.horizon = horizon

        self.item_emb = nn.Embedding(n_items, item_emb_dim)    # 0..n_items-1 (0=UNK)
        self.shop_emb = nn.Embedding(n_shops, shop_emb_dim)    # 0..n_shops-1 (0=UNK)
        self.dow_emb  = nn.Embedding(7,   dow_emb_dim)         # 0..6
        self.mon_emb  = nn.Embedding(13,  mon_emb_dim)         # 0..12
        self.woy_emb  = nn.Embedding(55,  woy_emb_dim)         # 0..54
        self.doy_emb  = nn.Embedding(367, doy_emb_dim)         # 0..366
        self.h_emb    = nn.Embedding(horizon, horizon_emb_dim)

        in_dim = (3
                  + dow_emb_dim + mon_emb_dim + woy_emb_dim + doy_emb_dim
                  + item_emb_dim + shop_emb_dim)

        self.encoder = nn.GRU(
            input_size=in_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0
        )

        dec_in = hidden_dim + item_emb_dim + shop_emb_dim + \
                 dow_emb_dim + mon_emb_dim + woy_emb_dim + doy_emb_dim + horizon_emb_dim

        self.head = nn.Sequential(
            nn.Linear(dec_in, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    @staticmethod
    def _ensure_1d_ids(x: torch.Tensor, B: int):
        if not torch.is_tensor(x):
            x = torch.tensor(x)
        if x.dim() == 0:
            x = x.view(1).repeat(B)
        elif x.dim() >= 2:
            x = x.view(B, -1)[:, 0]
        elif x.size(0) != B:
            x = x[:1].repeat(B)
        return x.long()

    @staticmethod
    def _clamp_like_emb(x: torch.Tensor, emb: nn.Embedding):
        hi = emb.num_embeddings - 1
        return x.long().clamp(min=0, max=hi)

    def forward(self,
                x_seq, x_rm7, x_rm28,           # (B,LB)
                x_dow, x_mon, x_woy, x_doy,     # (B,LB)
                item_id, shop_id,               # (B,)
                f_dow, f_mon, f_woy, f_doy):    # (B,HZ)
        B, LB = x_seq.size(0), x_seq.size(1)
        HZ = self.horizon

        # 안전 가드
        item_id = self._ensure_1d_ids(item_id, B)
        shop_id = self._ensure_1d_ids(shop_id, B)

        x_dow = self._clamp_like_emb(x_dow, self.dow_emb)
        x_mon = self._clamp_like_emb(x_mon, self.mon_emb)
        x_woy = self._clamp_like_emb(x_woy, self.woy_emb)
        x_doy = self._clamp_like_emb(x_doy, self.doy_emb)
        f_dow = self._clamp_like_emb(f_dow, self.dow_emb)
        f_mon = self._clamp_like_emb(f_mon, self.mon_emb)
        f_woy = self._clamp_like_emb(f_woy, self.woy_emb)
        f_doy = self._clamp_like_emb(f_doy, self.doy_emb)
        item_id = self._clamp_like_emb(item_id, self.item_emb)
        shop_id = self._clamp_like_emb(shop_id, self.shop_emb)

        # 임베딩
        item_e = self.item_emb(item_id)   # (B,Ei) 혹은 (Ei,)
        shop_e = self.shop_emb(shop_id)   # (B,Es) 혹은 (Es,)

        if item_e.dim() == 1: item_e = item_e.unsqueeze(0).expand(B, -1)
        if shop_e.dim() == 1: shop_e = shop_e.unsqueeze(0).expand(B, -1)

        x_dow_e = self.dow_emb(x_dow)    # (B,LB,Ed)
        x_mon_e = self.mon_emb(x_mon)
        x_woy_e = self.woy_emb(x_woy)
        x_doy_e = self.doy_emb(x_doy)

        # 수치 채널
        x_seq  = x_seq.unsqueeze(-1)     # (B,LB,1)
        x_rm7  = x_rm7.unsqueeze(-1)
        x_rm28 = x_rm28.unsqueeze(-1)

        # item/shop 반복
        item_rep = item_e.unsqueeze(1).repeat(1, LB, 1)
        shop_rep = shop_e.unsqueeze(1).repeat(1, LB, 1)

        # 인코더 입력
        enc_in = torch.cat([
            x_seq, x_rm7, x_rm28,
            x_dow_e, x_mon_e, x_woy_e, x_doy_e,
            item_rep, shop_rep
        ], dim=-1)  # (B,LB,in_dim)

        enc_out, _ = self.encoder(enc_in)   # (B,LB,H)
        context = enc_out[:, -1, :]         # (B,H)

        # 디코더 입력
        pos   = torch.arange(HZ, device=enc_in.device).unsqueeze(0).expand(B, HZ)
        pos_e = self.h_emb(pos)
        f_dow_e = self.dow_emb(f_dow)
        f_mon_e = self.mon_emb(f_mon)
        f_woy_e = self.woy_emb(f_woy)
        f_doy_e = self.doy_emb(f_doy)

        ctx_rep   = context.unsqueeze(1).repeat(1, HZ, 1)
        item_rep2 = item_e.unsqueeze(1).repeat(1, HZ, 1)
        shop_rep2 = shop_e.unsqueeze(1).repeat(1, HZ, 1)

        dec_in = torch.cat([ctx_rep, item_rep2, shop_rep2,
                            f_dow_e, f_mon_e, f_woy_e, f_doy_e, pos_e], dim=-1)
        out = self.head(dec_in).squeeze(-1)  # (B,HZ)
        return out

# 모델/옵티마/스케줄러
model = GlobalGRUForecaster(
    n_items=N_ITEMS, n_shops=N_SHOPS,
    item_emb_dim=32, shop_emb_dim=8,
    dow_emb_dim=4, mon_emb_dim=4, woy_emb_dim=6, doy_emb_dim=8,
    horizon_emb_dim=8, hidden_dim=128, num_layers=2,
    horizon=HORIZON, dropout=0.15
).to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-6)
mse_loss  = nn.MSELoss(reduction='none')  # 샘플 가중 적용 위해


In [60]:
# 빠른 점검: 한 배치 뽑아서 인덱스 범위 확인
b = next(iter(train_loader))
print("len(batch) =", len(b))
# 여기서 다시 최소/최대 찍어봐도 되고, 이제부터는 forward에서 강제로 정리하니 학습이 진행될 거예요.

if len(b) == 16:
    (xb, xrm7b, xrm28b,
     xdw, xmn, xwy, xdy,
     yb, iid, sid, wb, sc,
     fdw, fmn, fwy, fdy) = b

    print("item_emb size:", model.item_emb.num_embeddings, "min/max iid:", iid.min().item(), iid.max().item())
    print("shop_emb size:", model.shop_emb.num_embeddings, "min/max sid:", sid.min().item(), sid.max().item())
    print("dow_emb  size:", model.dow_emb.num_embeddings,  "min/max xdw:", xdw.min().item(), xdw.max().item(),
          "min/max fdw:", fdw.min().item(), fdw.max().item())
    print("mon_emb  size:", model.mon_emb.num_embeddings,  "min/max xmn:", xmn.min().item(), xmn.max().item(),
          "min/max fmn:", fmn.min().item(), fmn.max().item())
    print("woy_emb  size:", model.woy_emb.num_embeddings,  "min/max xwy:", xwy.min().item(), xwy.max().item(),
          "min/max fwy:", fwy.min().item(), fwy.max().item())
    print("doy_emb  size:", model.doy_emb.num_embeddings,  "min/max xdy:", xdy.min().item(), xdy.max().item(),
          "min/max fdy:", fdy.min().item(), fdy.max().item())
elif len(b) == 8:
    xb, xdw, fdw, yb, iid, sid, wb, sc = b
    print("item_emb size:", model.item_emb.num_embeddings, "min/max iid:", iid.min().item(), iid.max().item())
    print("shop_emb size:", model.shop_emb.num_embeddings, "min/max sid:", sid.min().item(), sid.max().item())
    print("dow_emb  size:", model.dow_emb.num_embeddings,  "min/max xdw:", xdw.min().item(), xdw.max().item(),
          "min/max fdw:", fdw.min().item(), fdw.max().item())
else:
    print("Unexpected batch length:", len(b))


len(batch) = 16
item_emb size: 194 min/max iid: 1 193
shop_emb size: 10 min/max sid: 1 9
dow_emb  size: 7 min/max xdw: 0 6 min/max fdw: 0 6
mon_emb  size: 13 min/max xmn: 1 12 min/max fmn: 1 12
woy_emb  size: 55 min/max xwy: 1 52 min/max fwy: 1 52
doy_emb  size: 367 min/max xdy: 1 365 min/max fdy: 1 365


In [61]:
# -------------------------
# Training / Validation (안정 가드 포함)
# -------------------------
def _clamp_idx_like_module(x, emb):
    hi = emb.num_embeddings - 1
    return x.long().clamp(min=0, max=hi)

def _ensure_1d_ids_lenB(x, B):
    if not torch.is_tensor(x):
        x = torch.tensor(x)
    if x.dim() == 0:
        x = x.view(1).repeat(B)
    elif x.dim() >= 2:
        x = x.view(B, -1)[:, 0]
    elif x.size(0) != B:
        x = x[:1].repeat(B)
    return x.long()

def run_epoch(loader, is_train=True):
    model.train() if is_train else model.eval()
    total_loss, total_smape, total_n = 0.0, 0.0, 0

    for batch in loader:
        (xb, xrm7b, xrm28b,
         xdw, xmn, xwy, xdy,
         yb, iid, sid, wb, sc,
         fdw, fmn, fwy, fdy) = batch

        B = xb.size(0)
        xb, xrm7b, xrm28b = xb.to(DEVICE), xrm7b.to(DEVICE), xrm28b.to(DEVICE)
        yb  = yb.to(DEVICE)
        wb  = wb.to(DEVICE)
        sc  = sc.to(DEVICE)

        iid = _ensure_1d_ids_lenB(iid.to(DEVICE), B)
        sid = _ensure_1d_ids_lenB(sid.to(DEVICE), B)

        xdw = _clamp_idx_like_module(xdw.to(DEVICE), model.dow_emb)
        xmn = _clamp_idx_like_module(xmn.to(DEVICE), model.mon_emb)
        xwy = _clamp_idx_like_module(xwy.to(DEVICE), model.woy_emb)
        xdy = _clamp_idx_like_module(xdy.to(DEVICE), model.doy_emb)
        fdw = _clamp_idx_like_module(fdw.to(DEVICE), model.dow_emb)
        fmn = _clamp_idx_like_module(fmn.to(DEVICE), model.mon_emb)
        fwy = _clamp_idx_like_module(fwy.to(DEVICE), model.woy_emb)
        fdy = _clamp_idx_like_module(fdy.to(DEVICE), model.doy_emb)

        with torch.set_grad_enabled(is_train):
            pred_scaled = model(
                xb, xrm7b, xrm28b,
                xdw, xmn, xwy, xdy,
                iid, sid,
                fdw, fmn, fwy, fdy
            )  # (B,HZ)

            # 안정화: 스케일 공간에서 gradient 폭주 방지
            pred_scaled = torch.clamp(pred_scaled, -20.0, 20.0)
            yb          = torch.clamp(yb,         -20.0, 20.0)

            # MSE (scaled space) → 샘플별 평균 → 가중 평균
            mse_per_t = mse_loss(pred_scaled, yb).mean(dim=1)   # (B,)
            loss_mse_weighted = (mse_per_t * wb).mean()

            # 원 스케일 복원
            pred_orig = torch.exp(pred_scaled * sc) - 1.0
            true_orig = torch.exp(yb          * sc) - 1.0
            pred_orig = torch.nan_to_num(pred_orig, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)
            true_orig = torch.nan_to_num(true_orig, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)

            # Weighted SMAPE
            loss_smape_weighted = smape_torch_weighted(true_orig, pred_orig, wb)

            # 최종 손실 (SMAPE에 더 가중)
            loss = ALPHA_SM * loss_smape_weighted + (1.0 - ALPHA_SM) * loss_mse_weighted

            if is_train:
                optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()

        total_loss  += float(loss.item()) * B
        total_smape += float(loss_smape_weighted.item()) * B
        total_n     += B

    return total_loss/total_n, total_smape/total_n

best_val = float('inf')
pat_cnt  = 0
best_state = None

for epoch in range(1, EPOCHS+1):
    tr_loss, tr_smape = run_epoch(train_loader, is_train=True)
    va_loss, va_smape = run_epoch(valid_loader, is_train=False)
    scheduler.step()

    print(f"[{epoch:02d}] train loss={tr_loss:.5f} smape={tr_smape:.5f} | "
          f"valid loss={va_loss:.5f} smape={va_smape:.5f}")

    if va_smape < best_val:
        best_val = va_smape
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
        pat_cnt = 0
    else:
        pat_cnt += 1
        if pat_cnt >= PATIENCE:
            print("Early stopping.")
            break

if best_state is not None:
    model.load_state_dict(best_state)
model.eval()


[01] train loss=1.39533 smape=0.59882 | valid loss=1.12156 smape=0.66794
[02] train loss=1.16446 smape=0.53441 | valid loss=1.09291 smape=0.68835
[03] train loss=1.03209 smape=0.50210 | valid loss=1.09451 smape=0.67697
[04] train loss=0.93001 smape=0.47950 | valid loss=1.09435 smape=0.64529
[05] train loss=0.85218 smape=0.46263 | valid loss=1.10844 smape=0.64889
[06] train loss=0.78383 smape=0.44719 | valid loss=1.11342 smape=0.64749
[07] train loss=0.73089 smape=0.43374 | valid loss=1.10344 smape=0.64813
[08] train loss=0.68032 smape=0.42001 | valid loss=1.19251 smape=0.67314
[09] train loss=0.64244 smape=0.40775 | valid loss=1.19124 smape=0.65761
[10] train loss=0.60547 smape=0.39604 | valid loss=1.17226 smape=0.67485
Early stopping.


GlobalGRUForecaster(
  (item_emb): Embedding(194, 32)
  (shop_emb): Embedding(10, 8)
  (dow_emb): Embedding(7, 4)
  (mon_emb): Embedding(13, 4)
  (woy_emb): Embedding(55, 6)
  (doy_emb): Embedding(367, 8)
  (h_emb): Embedding(7, 8)
  (encoder): GRU(65, 128, num_layers=2, batch_first=True, dropout=0.15)
  (head): Sequential(
    (0): Linear(in_features=198, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=256, out_features=128, bias=True)
    (4): ReLU()
    (5): Linear(in_features=128, out_features=1, bias=True)
  )
)

In [63]:
# -------------------------
# Inference on TEST_*.csv (STRICT: 28일 입력)
# -------------------------
def predict_file(test_path: str):
    df = pd.read_csv(test_path)
    df["영업일자"] = pd.to_datetime(df["영업일자"])
    df["영업장명_메뉴명"] = df["영업장명_메뉴명"].astype(str)
    df["업장명"] = df["영업장명_메뉴명"].apply(parse_shop)
    df = df.sort_values(["영업장명_메뉴명", "영업일자"]).reset_index(drop=True)

    rows = []
    for it, g in df.groupby("영업장명_메뉴명"):
        g = g.sort_values("영업일자")
        if len(g) < LOOKBACK:
            continue

        vals = _clean_counts(g["매출수량"].values[-LOOKBACK:])
        dts  = pd.to_datetime(g["영업일자"].values[-LOOKBACK:])
        x_dow = dts.dayofweek.values.astype(np.int64)
        x_mon = dts.month.values.astype(np.int64)
        x_woy = dts.isocalendar().week.values.astype(np.int64)
        x_doy = dts.dayofyear.values.astype(np.int64)

        s = pd.Series(vals)
        x_rm7  = s.rolling(7,  min_periods=1).mean().values
        x_rm28 = s.rolling(28, min_periods=1).mean().values

        iid = item2id.get(it, UNK_ITEM)
        shop = parse_shop(it)
        sid  = shop2id.get(shop, UNK_SHOP)

        x_log    = np.log1p(vals)
        rm7_log  = np.log1p(np.maximum(x_rm7,  0.0))
        rm28_log = np.log1p(np.maximum(x_rm28, 0.0))
        scale = float(np.nanmean(x_log))
        if not np.isfinite(scale) or abs(scale) < 1e-8:
            scale = 1.0

        x     = (x_log   / scale).astype(np.float32)
        xrm7  = (rm7_log / scale).astype(np.float32)
        xrm28 = (rm28_log/ scale).astype(np.float32)

        last_date = pd.to_datetime(dts[-1])
        fut_dates = [last_date + pd.Timedelta(days=k) for k in range(1, HORIZON+1)]
        f_dow = np.array([d.dayofweek          for d in fut_dates], dtype=np.int64)
        f_mon = np.array([d.month              for d in fut_dates], dtype=np.int64)
        f_woy = np.array([d.isocalendar().week for d in fut_dates], dtype=np.int64)
        f_doy = np.array([d.dayofyear          for d in fut_dates], dtype=np.int64)

        xb     = torch.from_numpy(x[None, :]).to(DEVICE)
        xrm7b  = torch.from_numpy(xrm7[None, :]).to(DEVICE)
        xrm28b = torch.from_numpy(xrm28[None, :]).to(DEVICE)

        xdw = torch.from_numpy(x_dow[None, :]).long().to(DEVICE)
        xmn = torch.from_numpy(x_mon[None, :]).long().to(DEVICE)
        xwy = torch.from_numpy(x_woy[None, :]).long().to(DEVICE)
        xdy = torch.from_numpy(x_doy[None, :]).long().to(DEVICE)

        fdw = torch.from_numpy(f_dow[None, :]).long().to(DEVICE)
        fmn = torch.from_numpy(f_mon[None, :]).long().to(DEVICE)
        fwy = torch.from_numpy(f_woy[None, :]).long().to(DEVICE)
        fdy = torch.from_numpy(f_doy[None, :]).long().to(DEVICE)

        iid_t = torch.tensor([iid], dtype=torch.long, device=DEVICE)
        sid_t = torch.tensor([sid], dtype=torch.long, device=DEVICE)

        with torch.no_grad():
            pred_scaled = model(
                xb, xrm7b, xrm28b,
                xdw, xmn, xwy, xdy,
                iid_t, sid_t,
                fdw, fmn, fwy, fdy
            )
            pred_scaled = torch.clamp(pred_scaled, -20.0, 20.0)
            pred_orig   = torch.exp(pred_scaled * torch.tensor([[scale]], device=DEVICE)) - 1.0
            pred_orig   = torch.clamp(pred_orig, min=0.0).squeeze(0).cpu().numpy()

        rows.append({"영업장명_메뉴명": it, "업장명": shop, "preds": pred_orig})

    filename = os.path.basename(test_path)
    prefix = re.search(r"(TEST_\d+)", filename).group(1)
    out_rows = []
    for r in rows:
        for i, v in enumerate(r["preds"], start=1):
            out_rows.append({
                "영업일자": f"{prefix}+{i}일",
                "영업장명_메뉴명": r["영업장명_메뉴명"],
                "매출수량": float(v)
            })
    return pd.DataFrame(out_rows)

# 모든 TEST 파일 예측 & 합치기
test_paths = sorted(glob.glob(TEST_GLOB))
print("TEST files:", len(test_paths), test_paths[:3], "...")
if len(test_paths) == 0:
    raise RuntimeError("TEST_*.csv가 없습니다. /data/test/ 하위 확인")

all_preds = []
for p in test_paths:
    df_pred = predict_file(p)
    if len(df_pred) == 0:
        print(f"경고: {os.path.basename(p)} 예측 결과가 비었습니다. (28일 미만 샘플?)")
    all_preds.append(df_pred)

full_pred_df = pd.concat(all_preds, ignore_index=True)
print("full_pred_df shape:", full_pred_df.shape)
# display(full_pred_df.head())  # Colab이면 주석 해제해 확인 가능


TEST files: 10 ['/data/test/TEST_00.csv', '/data/test/TEST_01.csv', '/data/test/TEST_02.csv'] ...
full_pred_df shape: (13510, 3)


In [64]:
# -------------------------
# 모든 TEST 파일 예측 & 합치기
# -------------------------
import glob, os

TEST_GLOB = "/data/test/TEST_*.csv"
test_paths = sorted(glob.glob(TEST_GLOB))
print("TEST files:", len(test_paths), test_paths[:3], " ...")

if len(test_paths) == 0:
    raise RuntimeError("TEST_*.csv 파일을 찾지 못했습니다. /data/test/ 아래에 TEST_00.csv ~ TEST_09.csv 확인")

model.eval()

all_preds = []
for p in test_paths:
    df_pred = predict_file(p)
    if len(df_pred) == 0:
        print(f"경고: {os.path.basename(p)} 예측 결과가 비었습니다. (28일 미만 샘플?)")
    all_preds.append(df_pred)

full_pred_df = pd.concat(all_preds, ignore_index=True)
print("full_pred_df shape:", full_pred_df.shape)
display(full_pred_df.head())


TEST files: 10 ['/data/test/TEST_00.csv', '/data/test/TEST_01.csv', '/data/test/TEST_02.csv']  ...
full_pred_df shape: (13510, 3)


Unnamed: 0,영업일자,영업장명_메뉴명,매출수량
0,TEST_00+1일,느티나무 셀프BBQ_1인 수저세트,7.727566
1,TEST_00+2일,느티나무 셀프BBQ_1인 수저세트,2.126642
2,TEST_00+3일,느티나무 셀프BBQ_1인 수저세트,3.101673
3,TEST_00+4일,느티나무 셀프BBQ_1인 수저세트,2.916096
4,TEST_00+5일,느티나무 셀프BBQ_1인 수저세트,4.198502


In [65]:
# -------------------------
# Convert to Submission Format
# -------------------------
def convert_to_submission(pred_df: pd.DataFrame, sample_df: pd.DataFrame):
    pred_key = dict(zip(
        zip(pred_df["영업일자"], pred_df["영업장명_메뉴명"]),
        pred_df["매출수량"]
    ))
    sub = sample_df.copy()
    for r in sub.index:
        d = sub.loc[r, "영업일자"]
        for c in sub.columns[1:]:
            sub.loc[r, c] = pred_key.get((d, c), 0.0)
    return sub

sample = pd.read_csv(SAMPLE_SUB) if os.path.exists(SAMPLE_SUB) \
         else pd.read_csv("/data/sample_submission/sample_submission.csv")
submission = convert_to_submission(full_pred_df, sample)

OUT_PATH = "/data/submission_global_gru.csv"
submission.to_csv(OUT_PATH, index=False, encoding="utf-8-sig")
print(f"Saved submission -> {OUT_PATH}")

Saved submission -> /data/submission_global_gru.csv


In [67]:
# -------------------------
# (Optional) Offline validation (확장 피처용)  ← unpack 16개!
# -------------------------
def offline_validation_score_ext(valid_loader):
    model.eval()
    ys, ps, ws = [], [], []
    with torch.no_grad():
        for batch in valid_loader:
            (xb, xrm7b, xrm28b,
             xdw, xmn, xwy, xdy,
             yb, iid, sid, wb, sc,
             fdw, fmn, fwy, fdy) = batch

            B = xb.size(0)
            xb, xrm7b, xrm28b = xb.to(DEVICE), xrm7b.to(DEVICE), xrm28b.to(DEVICE)
            yb  = yb.to(DEVICE)
            wb  = wb.to(DEVICE)
            sc  = sc.to(DEVICE)

            iid = _ensure_1d_ids_lenB(iid.to(DEVICE), B)
            sid = _ensure_1d_ids_lenB(sid.to(DEVICE), B)

            xdw = _clamp_idx_like_module(xdw.to(DEVICE), model.dow_emb)
            xmn = _clamp_idx_like_module(xmn.to(DEVICE), model.mon_emb)
            xwy = _clamp_idx_like_module(xwy.to(DEVICE), model.woy_emb)
            xdy = _clamp_idx_like_module(xdy.to(DEVICE), model.doy_emb)
            fdw = _clamp_idx_like_module(fdw.to(DEVICE), model.dow_emb)
            fmn = _clamp_idx_like_module(fmn.to(DEVICE), model.mon_emb)
            fwy = _clamp_idx_like_module(fwy.to(DEVICE), model.woy_emb)
            fdy = _clamp_idx_like_module(fdy.to(DEVICE), model.doy_emb)

            pred_scaled = model(
                xb, xrm7b, xrm28b,
                xdw, xmn, xwy, xdy,
                iid, sid,
                fdw, fmn, fwy, fdy
            )
            pred_scaled = torch.clamp(pred_scaled, -20.0, 20.0)
            yb          = torch.clamp(yb,         -20.0, 20.0)

            pred_orig = torch.exp(pred_scaled * sc) - 1.0
            true_orig = torch.exp(yb          * sc) - 1.0
            pred_orig = torch.nan_to_num(pred_orig, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)
            true_orig = torch.nan_to_num(true_orig, nan=0.0, posinf=0.0, neginf=0.0).clamp_min(0.0)

            ys.append(true_orig.cpu().numpy())
            ps.append(pred_orig.cpu().numpy())
            ws.append(wb.cpu().numpy())

    y = np.concatenate(ys, axis=0)   # (N,HZ)
    p = np.concatenate(ps, axis=0)   # (N,HZ)
    w = np.concatenate(ws, axis=0).reshape(-1, 1)  # (N,1)

    mask = (y != 0).astype(np.float32)
    num  = 2.0 * np.abs(y - p)
    den  = np.abs(y) + np.abs(p) + 1e-5
    sm   = (num / den) * mask
    per_sample = sm.sum(axis=1) / np.clip(mask.sum(axis=1), 1.0, None)
    score = float((per_sample.reshape(-1,1) * w).sum() / np.clip(w.sum(), 1.0, None))
    return score

try:
    approx_smape_w = offline_validation_score_ext(valid_loader)
    print(f"[Offline weighted SMAPE] {approx_smape_w:.6f}")
except Exception as e:
    print("Offline validation skipped:", e)


[Offline weighted SMAPE] 0.652261
