# Import

In [4]:
import os
import random
import glob
import re

import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
from tqdm import tqdm


# Fixed RandomSeed & Setting Hyperparameter

In [5]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(42)

In [6]:
LOOKBACK, PREDICT, BATCH_SIZE, EPOCHS = 28, 7, 16, 50
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data Load

In [4]:
train = pd.read_csv('./open/train/train.csv')

# Define Model

LSTM

In [54]:
class MultiOutputLSTM(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=64, num_layers=2, output_dim=7):
        super(MultiOutputLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])  # (B, output_dim)

LSTM+ATTENTION

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AdditiveAttention(nn.Module):
    """
    H: (B, T, H)  -> context: (B, H)
    score e_t = v^T tanh(W_h h_t)
    """
    def __init__(self, hidden_dim, attn_dim=128):
        super().__init__()
        self.W_h = nn.Linear(hidden_dim, attn_dim, bias=False)
        self.v   = nn.Linear(attn_dim, 1, bias=False)

    def forward(self, H, mask=None):
        # H: (B, T, H)
        # mask: (B, T) with 1 for valid, 0 for pad (or None)
        e = self.v(torch.tanh(self.W_h(H))).squeeze(-1)   # (B, T)
        if mask is not None:
            e = e.masked_fill(mask == 0, float("-inf"))
        a = torch.softmax(e, dim=-1)                      # (B, T)
        context = torch.bmm(a.unsqueeze(1), H).squeeze(1) # (B, H)
        return context, a

class MultiOutputLSTM(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=64, num_layers=4, output_dim=7, 
                 bidirectional=False, attn_dim=128, dropout=0.2):
        super().__init__()
        self.hidden_dim = hidden_dim * (2 if bidirectional else 1)
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
            bidirectional=bidirectional
        )
        self.attn = AdditiveAttention(self.hidden_dim, attn_dim=attn_dim)
        self.fc   = nn.Linear(self.hidden_dim, output_dim)

    def forward(self, x, mask=None, return_attn: bool = False):
        """
        x:    (B, T, input_dim)
        mask: (B, T)  1=valid, 0=pad  (or None)
        """
        H, _ = self.lstm(x)                         # (B, T, H*)
        context, attn_weights = self.attn(H, mask)  # (B, H*), (B, T)
        out = self.fc(context)                      # (B, output_dim)
        if return_attn:
            return out, attn_weights
        return out


# Train

In [None]:
def train_lstm(train_df):
    trained_models = {}

    # ✅ 명시적으로 고정된 features 사용
    features = [
        '매출수량', 'refund_flag', 'refund_amount', 'year', 'month', 'day',
        'quarter', 'dayofweek', 'is_month_start', 'is_month_end', 'weekofmonth',
        'dow_sin', 'dow_cos', 'mon_sin', 'mon_cos', 'is_public_holiday',
        'is_weekend', 'is_holiday_or_weekend', 'lag_1', 'lag_7', 'lag_14',
        'lag_28', 'roll_mean_7', 'ewm_mean_7', 'roll_mean_14', 'ewm_mean_14',
        'roll_mean_28', 'ewm_mean_28', 'spike_prev', 'zero_count_28', 'trend_7',
        'y_log1p', 'is_damha', 'is_miracia', 'menu_mean_sales', 'damha_lag1',
        'damha_roll7', 'miracia_lag1', 'miracia_roll7', 'damha_holiday',
        'miracia_holiday', 'key_dow_mean', 'key_mon_mean', 'dow_ratio',
        'mon_ratio', 'mon_delta', 'season'
    ]

    for store_menu, group in tqdm(train_df.groupby(['영업장명_메뉴명']), desc='Training LSTM'):
        store_train = group.sort_values('영업일자').copy()
        if len(store_train) < LOOKBACK + PREDICT:
            continue

        scaler = MinMaxScaler()
        store_train[features] = scaler.fit_transform(store_train[features])
        train_vals = store_train[features].values  # shape: (N, D)

        # 시퀀스 구성
        X_train, y_train = [], []
        for i in range(len(train_vals) - LOOKBACK - PREDICT + 1):
            X_train.append(train_vals[i:i+LOOKBACK])  # (28, D)
            y_train.append(train_vals[i+LOOKBACK:i+LOOKBACK+PREDICT, 0])  # 매출수량만 예측

        X_train = torch.tensor(X_train).float().to(DEVICE)
        y_train = torch.tensor(y_train).float().to(DEVICE)

        model = MultiOutputLSTM(input_dim=len(features), output_dim=PREDICT).to(DEVICE)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.MSELoss()

        model.train()
        for epoch in range(EPOCHS):
            idx = torch.randperm(len(X_train))
            for i in range(0, len(X_train), BATCH_SIZE):
                batch_idx = idx[i:i+BATCH_SIZE]
                X_batch, y_batch = X_train[batch_idx], y_train[batch_idx]
                output = model(X_batch)
                loss = criterion(output, y_batch)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        trained_models[store_menu] = {
            'model': model.eval(),
            'scaler': scaler,
            'last_sequence': train_vals[-LOOKBACK:]  # (28, D)
        }

    return trained_models, features


In [7]:
# 학습
trained_models = train_lstm(train)

  X_train = torch.tensor(X_train).float().to(DEVICE)
Training LSTM: 100%|██████████| 193/193 [5:43:31<00:00, 106.80s/it]    


In [8]:
# 모델 저장
save_path = "LSTMAttention_models.pth"
torch.save(trained_models, save_path)
print(f"모델이 {save_path}에 저장되었습니다.")


모델이 LSTMAttention_models.pth에 저장되었습니다.


In [9]:
# 모델 불러오기
loaded_models = torch.load("LSTMAttention_models.pth", map_location=DEVICE, weights_only=False)


# Prediction

In [10]:
def predict_lstm(test_df, trained_models, test_prefix: str):
    results = []

    for store_menu, store_test in test_df.groupby(['영업장명_메뉴명']):
        key = store_menu
        if key not in trained_models:
            continue

        model = trained_models[key]['model']
        scaler = trained_models[key]['scaler']

        store_test_sorted = store_test.sort_values('영업일자')
        recent_vals = store_test_sorted['매출수량'].values[-LOOKBACK:]
        if len(recent_vals) < LOOKBACK:
            continue

        # 정규화
        recent_vals = scaler.transform(recent_vals.reshape(-1, 1))
        x_input = torch.tensor([recent_vals]).float().to(DEVICE)

        with torch.no_grad():
            pred_scaled = model(x_input).squeeze().cpu().numpy()

        # 역변환
        restored = []
        for i in range(PREDICT):
            dummy = np.zeros((1, 1))
            dummy[0, 0] = pred_scaled[i]
            restored_val = scaler.inverse_transform(dummy)[0, 0]
            restored.append(max(restored_val, 0))

        # 예측일자: TEST_00+1일 ~ TEST_00+7일
        pred_dates = [f"{test_prefix}+{i+1}일" for i in range(PREDICT)]

        for d, val in zip(pred_dates, restored):
            results.append({
                '영업일자': d,
                '영업장명_메뉴명': store_menu,
                '매출수량': val
            })

    return pd.DataFrame(results)


In [11]:
all_preds = []

# 모든 test_*.csv 순회
test_files = sorted(glob.glob('./open/test/TEST_*.csv'))

for path in test_files:
    test_df = pd.read_csv(path)

    # 파일명에서 접두어 추출 (예: TEST_00)
    filename = os.path.basename(path)
    test_prefix = re.search(r'(TEST_\d+)', filename).group(1)

    pred_df = predict_lstm(test_df, trained_models, test_prefix)
    all_preds.append(pred_df)
    
full_pred_df = pd.concat(all_preds, ignore_index=True)



# Submission

In [12]:
import pandas as pd
import numpy as np

def convert_to_submission_format(pred_df: pd.DataFrame, sample_submission: pd.DataFrame):
    """
    sample_submission의 '컬럼명/순서/공백'은 그대로 유지하고,
    pred_df에서 (영업일자, 메뉴)별 예측 '매출수량'만 채워 넣음.
    - pred_df['영업장명_메뉴명']: (list/tuple)일 수 있으므로 첫 원소 취하고 strip()
    - sample 컬럼명은 절대 strip 하지 않음(업로드 규칙 보호)
    - 값은 음수 0 컷, 반올림, int 변환
    """
    # --- 0) 입력 복사
    final_df = sample_submission.copy()
    
    # --- 1) pred_df 전처리: 메뉴명 정규화(첫 원소 + strip), 날짜 문자열화
    pred_df = pred_df.copy()
    pred_df['영업장명_메뉴명'] = pred_df['영업장명_메뉴명'].apply(
        lambda x: (x[0] if isinstance(x, (list, tuple)) else x)
    ).astype(str).str.strip()
    pred_df['영업일자'] = pred_df['영업일자'].astype(str).str.strip()

    # 혹시 같은 (영업일자, 메뉴) 중복이 있으면 합산(또는 평균 등 정책에 맞게 변경)
    pred_agg = (pred_df
                .groupby(['영업일자', '영업장명_메뉴명'], as_index=True)['매출수량']
                .sum()
               )

    # --- 2) wide 피벗(정규화된 메뉴명 컬럼)
    #      pred_wide.index = 영업일자(str), pred_wide.columns = 메뉴명(strip 적용됨)
    pred_wide = pred_agg.unstack(fill_value=0)

    # --- 3) sample의 기준 준비: 날짜/컬럼 유지(공백 포함)
    id_col = final_df.columns[0]         # 예: '영업일자'
    final_df[id_col] = final_df[id_col].astype(str)  # 날짜를 문자열로 맞춤
    dates = final_df[id_col].tolist()    # 샘플의 행 순서 그대로

    # 샘플의 메뉴명은 strip하지 않음 → 대신 매칭용으로만 strip한 매핑 생성
    sample_cols = list(final_df.columns[1:])
    sample_col_norm_map = {col: str(col).strip() for col in sample_cols}

    # --- 4) pred_wide를 샘플 날짜 순서에 맞게 reindex
    pred_wide = pred_wide.reindex(dates)

    # --- 5) 값 채우기: 샘플의 각 컬럼(공백 포함 이름)을 그대로 두고,
    #                 strip한 이름으로 pred_wide에서 찾아 채움(없으면 0)
    fill_vals = {}
    for raw_col in sample_cols:
        norm_col = sample_col_norm_map[raw_col]
        if (pred_wide is not None) and (norm_col in pred_wide.columns):
            fill_vals[raw_col] = pred_wide[norm_col].to_numpy()
        else:
            # 예측에 해당 메뉴 없음 → 0으로 채움
            fill_vals[raw_col] = np.zeros(len(dates), dtype=float)

    # 채운 값 붙이기
    for raw_col, arr in fill_vals.items():
        final_df[raw_col] = arr



    return final_df


In [13]:
sample_submission = pd.read_csv('./open/sample_submission.csv')
submission = convert_to_submission_format(full_pred_df, sample_submission)
submission.to_csv('LSTMAttention_submission.csv', index=False, encoding='utf-8-sig')

In [14]:
full_pred_df

Unnamed: 0,영업일자,영업장명_메뉴명,매출수량
0,TEST_00+1일,"(느티나무 셀프BBQ_1인 수저세트,)",4.697508
1,TEST_00+2일,"(느티나무 셀프BBQ_1인 수저세트,)",4.366394
2,TEST_00+3일,"(느티나무 셀프BBQ_1인 수저세트,)",4.091449
3,TEST_00+4일,"(느티나무 셀프BBQ_1인 수저세트,)",4.595173
4,TEST_00+5일,"(느티나무 셀프BBQ_1인 수저세트,)",4.228775
...,...,...,...
13505,TEST_09+3일,"(화담숲카페_현미뻥스크림,)",25.257355
13506,TEST_09+4일,"(화담숲카페_현미뻥스크림,)",22.742236
13507,TEST_09+5일,"(화담숲카페_현미뻥스크림,)",25.391198
13508,TEST_09+6일,"(화담숲카페_현미뻥스크림,)",22.208152
