In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from datetime import timedelta

# --------------------
# 1. 데이터 불러오기
# --------------------
train_df = pd.read_csv("C:/Users/owner/Desktop/LG Aimers/open/train/processed_with_weekday_holiday.csv")
test_df = pd.read_csv("C:/Users/owner/Desktop/LG Aimers/open/test/aaTEST_00.csv")

# --------------------
# 2. 범주형 인코딩
# --------------------
le_menu = LabelEncoder()
le_store = LabelEncoder()

train_df['메뉴코드'] = le_menu.fit_transform(train_df['메뉴명'])
train_df['업장코드'] = le_store.fit_transform(train_df['영업장명'])

test_df['메뉴코드'] = le_menu.transform(test_df['메뉴명'])
test_df['업장코드'] = le_store.transform(test_df['영업장명'])

# --------------------
# 3. 날짜 변환 및 정렬
# --------------------
train_df['영업일자'] = pd.to_datetime(train_df['영업일자'])
test_df['영업일자'] = pd.to_datetime(test_df['영업일자'])

train_df = train_df.sort_values(['업장코드', '메뉴코드', '영업일자'])
test_df = test_df.sort_values(['업장코드', '메뉴코드', '영업일자'])

# --------------------
# 4. 시퀀스 구성
# --------------------
SEQ_LEN = 14
PRED_DAYS = 7

features = ['요일번호', '공휴일여부']
target_col = '매출수량'

scaler = MinMaxScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

class SalesDataset(Dataset):
    def __init__(self, df, seq_len, pred_len):
        self.X = []
        self.y = []
        grouped = df.groupby(['업장코드', '메뉴코드'])
        for _, group in grouped:
            group = group.sort_values('영업일자')
            if len(group) < seq_len + pred_len:
                continue
            for i in range(len(group) - seq_len - pred_len + 1):
                x_seq = group[features].iloc[i:i+seq_len].values
                y_seq = group[target_col].iloc[i+seq_len:i+seq_len+pred_len].values
                self.X.append(x_seq)
                self.y.append(y_seq)

        self.X = torch.tensor(self.X, dtype=torch.float32)
        self.y = torch.tensor(self.y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = SalesDataset(train_df, SEQ_LEN, PRED_DAYS)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# --------------------
# 5. LSTM 모델 정의
# --------------------
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # 마지막 시점 출력
        return out

device = torch.device("cpu")  # CUDA 오류 회피
model = LSTMModel(input_size=len(features), hidden_size=64, output_size=PRED_DAYS).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# --------------------
# 6. 학습
# --------------------
for epoch in range(20):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

# --------------------
# 7. 테스트 데이터 예측
# --------------------
pred_results = []
grouped = test_df.groupby(['업장코드', '메뉴코드'])

model.eval()
for (store, menu), group in grouped:
    group = group.sort_values('영업일자')
    if len(group) < SEQ_LEN:
        continue
    x_input = group[features].iloc[-SEQ_LEN:].values
    x_input = torch.tensor(x_input, dtype=torch.float32).unsqueeze(0).to(device)
    with torch.no_grad():
        y_pred = model(x_input).cpu().numpy().flatten()

    for i in range(PRED_DAYS):
        pred_date = group['영업일자'].max() + timedelta(days=i+1)
        pred_results.append({
            '영업장명': le_store.inverse_transform([store])[0],
            '메뉴명': le_menu.inverse_transform([menu])[0],
            '영업일자': pred_date.strftime("%Y-%m-%d"),
            '예측매출수량': max(0, round(y_pred[i]))
        })

# --------------------
# 8. 제출파일 생성
# --------------------
pred_df = pd.DataFrame(pred_results)
pred_df.to_csv("C:/Users/owner/Desktop/LG Aimers/open/submission_lstm.csv", index=False)


  self.X = torch.tensor(self.X, dtype=torch.float32)


Epoch 1, Loss: 2637095.6375
Epoch 2, Loss: 2630334.5194
Epoch 3, Loss: 2630334.0878
Epoch 4, Loss: 2630302.3345
Epoch 5, Loss: 2630362.3061
Epoch 6, Loss: 2630341.9796
Epoch 7, Loss: 2630422.4173
Epoch 8, Loss: 2630194.5570
Epoch 9, Loss: 2630288.2120
Epoch 10, Loss: 2630281.5889
Epoch 11, Loss: 2630238.1039
Epoch 12, Loss: 2629994.1125
Epoch 13, Loss: 2629650.5809
Epoch 14, Loss: 2629308.2869
Epoch 15, Loss: 2628610.2391
Epoch 16, Loss: 2628067.7528
Epoch 17, Loss: 2627552.8788
Epoch 18, Loss: 2626747.0515
Epoch 19, Loss: 2626040.0637
Epoch 20, Loss: 2624764.3494
