# ❗ 5. SKHynix PBL 시계열 시퀀스 모델링 ❗


## 📌 개요

시간대(timekey_hr) 내에서 공정 순서(oper_id)를 고려한 시퀀스 기반 TAT 예측 모델입니다. 동일한 timekey_hr 내의 oper_id들을 순서대로 정렬하여 시퀀스 데이터로 구성하고, 각 oper별 개별 예측(sequence-to-sequence)을 수행합니다.

**데이터 구조**: `[batch_size, sequence_length, feature_dim]`
- **sequence_length**: timekey_hr 내 최대 oper_id 개수 (하이퍼파라미터)
- **feature_dim**: 연속형 변수 개수 + 범주형 변수 개수 × 임베딩 차원

**지원 모델**:
1. **RNN/LSTM/GRU**: 기본 순환 신경망
2. **RNN + Self-Attention**: 순환 신경망에 어텐션 메커니즘 추가
3. **CNN 1D**: 다중 커널 1차원 합성곱 신경망

## 🔧 환경 설정 및 라이브러리

In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import yaml
import logging
import json
from datetime import datetime
from tqdm import tqdm
from typing import Dict, List, Tuple, Optional, Union

# sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# PyTorch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

## 📊 유틸리티 함수들

### 설정 로딩 및 시드 설정

In [3]:
def load_config(config_dir: str = "configs") -> Dict:
    """YAML 설정 파일들을 통합하여 로드"""
    configs = {}
    config_files = ["dataset", "model", "training"]

    for file in config_files:
        config_path = os.path.join(config_dir, f"{file}.yaml")
        with open(config_path, "r", encoding="utf-8") as f:
            config = yaml.safe_load(f)
            configs.update(config)

    return configs


def set_random_seeds(seed: int = 42):
    """재현성을 위한 랜덤 시드 설정"""
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def setup_logging(log_file: str = "training.log"):
    """로깅 설정"""
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[
            logging.FileHandler(log_file),
            logging.StreamHandler()
        ]
    )
    return logging.getLogger(__name__)

logger = setup_logging()

### 범주형 데이터 처리기

In [5]:
class CategoricalProcessor:
    """범주형 변수 임베딩을 위한 처리기"""
    
    def __init__(self, embedding_dim: int = 8):
        self.embedding_dim = embedding_dim
        self.label_encoders = {}
        self.vocab_sizes = {}
        self.categorical_columns = []
        
    def fit(self, df: pd.DataFrame, categorical_columns: List[str]):
        """전체 데이터에 대해 범주형 인코더 학습"""
        self.categorical_columns = categorical_columns
        
        for col in categorical_columns:
            unique_values = df[col].astype(str).unique()
            encoder = LabelEncoder()
            encoder.fit(unique_values)
            
            self.label_encoders[col] = encoder
            self.vocab_sizes[col] = len(encoder.classes_)
        
        logger.info(f"범주형 변수별 고유값 개수:")
        for col in categorical_columns:
            logger.info(f"  {col}: {self.vocab_sizes[col]}개")
    
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """DataFrame의 범주형 컬럼들을 숫자로 변환"""
        df_encoded = df.copy()
        
        for col in self.categorical_columns:
            df_encoded[col] = self.label_encoders[col].transform(
                df_encoded[col].astype(str)
            )
        
        return df_encoded
    
    def get_vocab_sizes(self) -> List[int]:
        """각 범주형 변수의 vocab_size 리스트 반환"""
        return [self.vocab_sizes[col] for col in self.categorical_columns]

## 🗂️ 시퀀스 데이터셋 클래스

### 메인 데이터셋

In [6]:
class SequenceOperDataset(Dataset):
    """시퀀스 기반 oper 데이터셋"""
    
    def __init__(
        self,
        df: pd.DataFrame,
        categorical_columns: List[str],
        continuous_columns: List[str],
        target_column: str = "y",
        categorical_processor: Optional[CategoricalProcessor] = None,
        max_sequence_length: int = 50,
        embedding_dim: int = 8,
        padding_value: float = -999999.0
    ):
        self.df = df.copy()
        self.categorical_columns = categorical_columns
        self.continuous_columns = continuous_columns
        self.target_column = target_column
        self.max_sequence_length = max_sequence_length
        self.embedding_dim = embedding_dim
        self.padding_value = padding_value
        
        # 범주형 데이터 처리기 설정
        if categorical_processor is None:
            self.categorical_processor = CategoricalProcessor(embedding_dim)
            self.categorical_processor.fit(df, categorical_columns)
        else:
            self.categorical_processor = categorical_processor
        
        # 데이터 전처리 및 시퀀스 생성
        self._preprocess_data()
        self._create_sequences()
        
        logger.info(f"시퀀스 데이터셋 구성 완료:")
        logger.info(f"  - 총 시퀀스 수: {len(self.sequences)}")
        logger.info(f"  - 최대 시퀀스 길이: {max_sequence_length}")
        logger.info(f"  - 특성 차원: {self.feature_dim}")
        logger.info(f"  - 패딩값: {padding_value}")
    
    def _preprocess_data(self):
        """데이터 전처리"""
        # 범주형 데이터 인코딩
        if self.categorical_columns:
            categorical_encoded = self.categorical_processor.transform(
                self.df[self.categorical_columns]
            )
            self.df[self.categorical_columns] = categorical_encoded
        
        # 특성 차원 계산
        continuous_dim = len(self.continuous_columns)
        categorical_dim = len(self.categorical_columns) * self.embedding_dim
        self.feature_dim = continuous_dim + categorical_dim
    
    def _create_sequences(self):
        """timekey_hr별로 oper_id 순서 기준 시퀀스 생성"""
        self.sequences = []
        
        # timekey_hr별로 그룹화
        grouped = self.df.groupby('timekey_hr')
        
        for timekey_hr, group in grouped:
            # oper_id 순서로 정렬
            group_sorted = group.sort_values('oper_id').reset_index(drop=True)
            
            if len(group_sorted) == 0:
                continue
            
            # 연속형 데이터 추출
            if self.continuous_columns:
                continuous_data = group_sorted[self.continuous_columns].values
            else:
                continuous_data = np.empty((len(group_sorted), 0))
            
            # 범주형 데이터 추출 (나중에 임베딩으로 변환)
            if self.categorical_columns:
                categorical_data = group_sorted[self.categorical_columns].values
            else:
                categorical_data = np.empty((len(group_sorted), 0))
            
            # 타겟 데이터 추출
            target_data = group_sorted[self.target_column].values
            
            # oper_id 정보 (디버깅용)
            oper_ids = group_sorted['oper_id'].values
            
            sequence_info = {
                'timekey_hr': timekey_hr,
                'continuous_data': continuous_data,
                'categorical_data': categorical_data,
                'target_data': target_data,
                'oper_ids': oper_ids,
                'sequence_length': len(group_sorted)
            }
            
            self.sequences.append(sequence_info)
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        
        return {
            'continuous_data': sequence['continuous_data'],
            'categorical_data': sequence['categorical_data'], 
            'target_data': sequence['target_data'],
            'sequence_length': sequence['sequence_length'],
            'timekey_hr': sequence['timekey_hr'],
            'oper_ids': sequence['oper_ids']
        }


def sequence_collate_fn(batch, max_sequence_length: int, padding_value: float = -999999.0):
    """시퀀스 배치를 위한 패딩 함수 (구조 정보 포함)"""
    batch_size = len(batch)
    
    # 첫 번째 샘플에서 차원 정보 추출
    first_sample = batch[0]
    continuous_dim = first_sample['continuous_data'].shape[1]
    categorical_dim = first_sample['categorical_data'].shape[1]
    
    # 기존 데이터 패딩
    batch_continuous = np.full(
        (batch_size, max_sequence_length, continuous_dim), 
        padding_value, dtype=np.float32
    )
    batch_categorical = np.full(
        (batch_size, max_sequence_length, categorical_dim), 
        0, dtype=np.int64
    )
    batch_targets = np.full(
        (batch_size, max_sequence_length), 
        padding_value, dtype=np.float32
    )
    batch_masks = np.ones(
        (batch_size, max_sequence_length), 
        dtype=bool
    )
    
    # 구조 정보를 위한 리스트들 추가
    batch_timekey_hrs = []
    batch_oper_ids_list = []
    batch_lengths = []
    
    for i, sample in enumerate(batch):
        seq_len = min(sample['sequence_length'], max_sequence_length)
        batch_lengths.append(seq_len)
        
        # 기존 데이터 채우기
        batch_continuous[i, :seq_len] = sample['continuous_data'][:seq_len]
        batch_categorical[i, :seq_len] = sample['categorical_data'][:seq_len]  
        batch_targets[i, :seq_len] = sample['target_data'][:seq_len]
        batch_masks[i, :seq_len] = False
        
        # 구조 정보 추가
        batch_timekey_hrs.append(sample['timekey_hr'])
        # max_sequence_length만큼 oper_id 리스트 생성 (패딩된 부분은 None)
        oper_ids_padded = list(sample['oper_ids'][:seq_len])
        while len(oper_ids_padded) < max_sequence_length:
            oper_ids_padded.append(None)
        batch_oper_ids_list.append(oper_ids_padded)
    
    return {
        'continuous_data': torch.tensor(batch_continuous),
        'categorical_data': torch.tensor(batch_categorical),
        'targets': torch.tensor(batch_targets),
        'masks': torch.tensor(batch_masks),
        'sequence_lengths': batch_lengths,
        'timekey_hrs': batch_timekey_hrs,  
        'oper_ids_list': batch_oper_ids_list
    }


def create_dataloaders(dataset_config: Dict) -> Tuple[DataLoader, DataLoader, DataLoader]:
    """데이터로더 생성"""
    
    # 데이터 로드 및 전처리
    data_path = dataset_config["file_path"]
    excel = pd.read_excel(data_path, sheet_name=None, header=1)
    sheet_names = dataset_config["sheet_names"]
    
    total_df = pd.concat([excel[sheet_name] for sheet_name in sheet_names])
    
    # 기본 전처리
    if "Unnamed: 0" in total_df.columns:
        total_df.drop(columns="Unnamed: 0", inplace=True)
    
    # y값 결측치 제거
    df = total_df[~total_df[dataset_config["target_column"]].isna()].copy()
    
    # 불필요한 컬럼 제거
    drop_columns = dataset_config.get("additional_drop_columns", [])
    if drop_columns:
        existing_drops = [col for col in drop_columns if col in df.columns]
        if existing_drops:
            df = df.drop(columns=existing_drops)
    
    df.reset_index(drop=True, inplace=True)
    
    # 전체 데이터에 대해 범주형 처리기 학습
    categorical_processor = CategoricalProcessor(
        embedding_dim=dataset_config.get("embedding_dim", 8)
    )
    categorical_processor.fit(df, dataset_config["categorical_columns"])
    
    # 데이터 분할 (8:1:1)
    total_size = len(df)
    train_end = int(total_size * dataset_config.get("train_ratio", 0.8))
    val_end = int(total_size * (dataset_config.get("train_ratio", 0.8) + dataset_config.get("val_ratio", 0.1)))
    
    train_df = df[:train_end].copy()
    val_df = df[train_end:val_end].copy()
    test_df = df[val_end:].copy()
    
    # 데이터셋 생성
    train_dataset = SequenceOperDataset(
        df=train_df,
        categorical_columns=dataset_config["categorical_columns"],
        continuous_columns=dataset_config["continuous_columns"],
        target_column=dataset_config["target_column"],
        categorical_processor=categorical_processor,
        max_sequence_length=dataset_config.get("max_sequence_length", 50),
        embedding_dim=dataset_config.get("embedding_dim", 8),
        padding_value=dataset_config.get("padding_value", -999999.0)
    )
    
    val_dataset = SequenceOperDataset(
        df=val_df,
        categorical_columns=dataset_config["categorical_columns"],
        continuous_columns=dataset_config["continuous_columns"], 
        target_column=dataset_config["target_column"],
        categorical_processor=categorical_processor,
        max_sequence_length=dataset_config.get("max_sequence_length", 50),
        embedding_dim=dataset_config.get("embedding_dim", 8),
        padding_value=dataset_config.get("padding_value", -999999.0)
    )
    
    test_dataset = SequenceOperDataset(
        df=test_df,
        categorical_columns=dataset_config["categorical_columns"],
        continuous_columns=dataset_config["continuous_columns"],
        target_column=dataset_config["target_column"], 
        categorical_processor=categorical_processor,
        max_sequence_length=dataset_config.get("max_sequence_length", 50),
        embedding_dim=dataset_config.get("embedding_dim", 8),
        padding_value=dataset_config.get("padding_value", -999999.0)
    )
    
    # Collate 함수 설정
    def collate_fn(batch):
        return sequence_collate_fn(
            batch, 
            max_sequence_length=dataset_config.get("max_sequence_length", 50),
            padding_value=dataset_config.get("padding_value", -999999.0)
        )
    
    # 데이터로더 생성
    batch_size = dataset_config.get("batch_size", 32)
    num_workers = dataset_config.get("num_workers", 4)
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True
    )
    
    test_loader = DataLoader(
        test_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        collate_fn=collate_fn,
        num_workers=num_workers,
        pin_memory=True
    )
    
    logger.info(f"데이터로더 생성 완료:")
    logger.info(f"  - 훈련 샘플: {len(train_dataset)}")
    logger.info(f"  - 검증 샘플: {len(val_dataset)}")
    logger.info(f"  - 테스트 샘플: {len(test_dataset)}")
    logger.info(f"  - 배치 크기: {batch_size}")
    
    return train_loader, val_loader, test_loader, categorical_processor

In [8]:
excel = pd.read_excel("/home/doyooni303/teaching/SK_Hynix/PBL_TAT_Set_Data Rev2(사외).xlsx", sheet_name=None, header=1)

total = pd.concat([excel[sheet_name] for sheet_name in ["Data_Set1(사외)","Data_Set2(사외)"]] )


In [None]:
total.drop(columns="Unnamed: 0", inplace=True)

NameError: name 'total' is not defined

In [None]:
import argparse

parser = argparse.ArgumentParser(description="시계열 시퀀스 모델링")
parser.add_argument("--config-dir", default="configs", help="설정 파일 디렉토리")
parser.add_argument("--mode", choices=["train", "eval"], default="train", help="실행 모드")
parser.add_argument("--model-path", default=None, help="평가용 모델 경로")
parser.add_argument("--gpu", type=int, default=0, help="GPU 번호")
parser.add_argument("--exp-name", default="code-test", help="실험명")

args = parser.parse_args([])

# 설정 로드
config = load_config(args.config_dir)
set_random_seeds(42)

# 실험명 설정
if args.exp_name:
    exp_name = args.exp_name
else:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    model_type = config.get("model_type", "lstm")
    exp_name = f"{model_type}_{timestamp}"

# 저장 디렉토리
save_dir = config.get("save_dir", "models")
os.makedirs(save_dir, exist_ok=True)
model_save_path = os.path.join(save_dir, f"{exp_name}.pth")

# 디바이스 설정
device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {device}")

# 데이터로더 생성
logger.info("데이터 로딩 중...")
train_loader, val_loader, test_loader, categorical_processor = create_dataloaders(config)

## 🏗️ 모델 아키텍처들

### RNN 기본 모델 (models/rnn.py)

In [5]:
class RNNModel(nn.Module):
    """기본 RNN/LSTM/GRU 모델"""
    
    def __init__(
        self,
        vocab_sizes: List[int],  # 각 범주형 변수의 고유값 개수 리스트 [277, 7, 3, 20]
        continuous_dim: int,
        embedding_dim: int = 8,
        rnn_type: str = "LSTM",
        hidden_dim: int = 128,
        num_layers: int = 2,
        dropout: float = 0.1,
        bidirectional: bool = True,
        padding_value: float = -999999.0
    ):
        super().__init__()
        
        self.embedding_dim = embedding_dim
        self.continuous_dim = continuous_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.padding_value = padding_value
        self.num_categorical_vars = len(vocab_sizes)  # 범주형 변수의 개수
        
        # 각 범주형 변수별로 별도의 임베딩 레이어 생성
        # 예: oper_group(277) -> 8차원, days(7) -> 8차원, shift(3) -> 8차원, x1(20) -> 8차원
        self.embeddings = nn.ModuleList([
            nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
            for vocab_size in vocab_sizes
        ])
        
        # 입력 차원 = 연속형 차원 + (범주형 변수 개수 × 임베딩 차원)
        # 예: continuous_dim=20, num_categorical_vars=4, embedding_dim=8
        # → input_dim = 20 + 4*8 = 52
        input_dim = continuous_dim + self.num_categorical_vars * embedding_dim
        
        # RNN 레이어
        if rnn_type.upper() == "LSTM":
            self.rnn = nn.LSTM(
                input_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        elif rnn_type.upper() == "GRU":
            self.rnn = nn.GRU(
                input_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        else:  # RNN
            self.rnn = nn.RNN(
                input_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        
        # 출력 차원 계산
        rnn_output_dim = hidden_dim * (2 if bidirectional else 1)
        
        # 출력 레이어
        self.output_layer = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(rnn_output_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1)
        )
    
    def forward(self, continuous_data, categorical_data, masks, sequence_lengths):
        """
        Args:
            continuous_data: [batch_size, seq_len, continuous_dim]
            categorical_data: [batch_size, seq_len, num_categorical_vars] (각 위치는 범주형 변수의 인덱스)
            masks: [batch_size, seq_len] (True = 패딩)
            sequence_lengths: [batch_size] 
        """
        batch_size, seq_len = continuous_data.shape[:2]
        
        # 각 범주형 변수별로 임베딩 적용
        # categorical_data[:, :, 0] = oper_group 인덱스들 → 8차원 임베딩
        # categorical_data[:, :, 1] = days 인덱스들 → 8차원 임베딩  
        # categorical_data[:, :, 2] = shift 인덱스들 → 8차원 임베딩
        # categorical_data[:, :, 3] = x1 인덱스들 → 8차원 임베딩
        embedded_categorical = []
        for i, embedding_layer in enumerate(self.embeddings):
            # categorical_data[:, :, i]: [batch_size, seq_len] → [batch_size, seq_len, embedding_dim]
            embedded = embedding_layer(categorical_data[:, :, i])
            embedded_categorical.append(embedded)
        
        if embedded_categorical:
            # 모든 범주형 변수의 임베딩을 concatenate
            # [batch_size, seq_len, num_categorical_vars * embedding_dim]
            categorical_embedded = torch.cat(embedded_categorical, dim=-1)
        else:
            categorical_embedded = torch.empty(batch_size, seq_len, 0, device=continuous_data.device)
        
        # 연속형과 범주형 결합
        # [batch_size, seq_len, continuous_dim + num_categorical_vars * embedding_dim]
        combined_input = torch.cat([continuous_data, categorical_embedded], dim=-1)
        
        # 패딩된 위치를 마스킹
        combined_input = combined_input.masked_fill(
            masks.unsqueeze(-1), self.padding_value
        )
        
        # RNN forward
        rnn_output, _ = self.rnn(combined_input)
        
        # 출력 레이어
        predictions = self.output_layer(rnn_output).squeeze(-1)
        
        # 패딩된 위치는 0으로 마스킹
        predictions = predictions.masked_fill(masks, 0.0)
        
        return predictions


### RNN + Self-Attention 모델 (models/attention.py)

In [6]:
class SelfAttention(nn.Module):
    """Self-Attention 메커니즘"""
    
    def __init__(self, hidden_dim: int, num_heads: int = 8, dropout: float = 0.1):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.head_dim = hidden_dim // num_heads
        
        assert hidden_dim % num_heads == 0, "hidden_dim must be divisible by num_heads"
        
        self.query = nn.Linear(hidden_dim, hidden_dim)
        self.key = nn.Linear(hidden_dim, hidden_dim)
        self.value = nn.Linear(hidden_dim, hidden_dim)
        
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(hidden_dim)
        
    def forward(self, x, mask=None):
        """
        Args:
            x: [batch_size, seq_len, hidden_dim]
            mask: [batch_size, seq_len] (True = 패딩)
        """
        batch_size, seq_len, _ = x.shape
        
        # Multi-head attention
        Q = self.query(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = self.key(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = self.value(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        
        # Attention scores
        attention_scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.head_dim)
        
        # Apply mask
        if mask is not None:
            attention_mask = mask.unsqueeze(1).unsqueeze(1)  # [batch, 1, 1, seq_len]
            attention_scores = attention_scores.masked_fill(attention_mask, float('-inf'))
        
        # Softmax
        attention_weights = F.softmax(attention_scores, dim=-1)
        attention_weights = self.dropout(attention_weights)
        
        # Apply attention
        attended = torch.matmul(attention_weights, V)
        attended = attended.transpose(1, 2).contiguous().view(batch_size, seq_len, self.hidden_dim)
        
        # Residual connection + Layer norm
        output = self.layer_norm(x + attended)
        
        return output


class RNNAttentionModel(nn.Module):
    """RNN + Self-Attention 모델"""
    
    def __init__(
        self,
        vocab_sizes: List[int],  # 각 범주형 변수의 고유값 개수 리스트 [277, 7, 3, 20]
        continuous_dim: int,
        embedding_dim: int = 8,
        rnn_type: str = "LSTM", 
        hidden_dim: int = 128,
        num_layers: int = 2,
        num_attention_heads: int = 8,
        dropout: float = 0.1,
        bidirectional: bool = True,
        padding_value: float = -999999.0
    ):
        super().__init__()
        
        self.embedding_dim = embedding_dim
        self.continuous_dim = continuous_dim
        self.hidden_dim = hidden_dim
        self.padding_value = padding_value
        self.num_categorical_vars = len(vocab_sizes)  # 범주형 변수의 개수
        
        # 각 범주형 변수별로 별도의 임베딩 레이어 생성
        self.embeddings = nn.ModuleList([
            nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
            for vocab_size in vocab_sizes
        ])
        
        # 입력 차원 = 연속형 차원 + (범주형 변수 개수 × 임베딩 차원)
        input_dim = continuous_dim + self.num_categorical_vars * embedding_dim
        
        # RNN 레이어
        if rnn_type.upper() == "LSTM":
            self.rnn = nn.LSTM(
                input_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        elif rnn_type.upper() == "GRU":
            self.rnn = nn.GRU(
                input_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        else:  # RNN
            self.rnn = nn.RNN(
                input_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        
        # RNN 출력 차원
        rnn_output_dim = hidden_dim * (2 if bidirectional else 1)
        
        # RNN 출력을 어텐션 입력 차원으로 변환
        self.rnn_projection = nn.Linear(rnn_output_dim, hidden_dim)
        
        # Self-Attention
        self.self_attention = SelfAttention(
            hidden_dim, num_attention_heads, dropout
        )
        
        # 출력 레이어
        self.output_layer = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1)
        )
    
    def forward(self, continuous_data, categorical_data, masks, sequence_lengths):
        """
        Args:
            continuous_data: [batch_size, seq_len, continuous_dim]
            categorical_data: [batch_size, seq_len, num_categorical]
            masks: [batch_size, seq_len] (True = 패딩)
        """
        batch_size, seq_len = continuous_data.shape[:2]
        
        # 범주형 데이터 임베딩
        embedded_categorical = []
        for i, embedding in enumerate(self.embeddings):
            embedded = embedding(categorical_data[:, :, i])
            embedded_categorical.append(embedded)
        
        if embedded_categorical:
            categorical_embedded = torch.cat(embedded_categorical, dim=-1)
        else:
            categorical_embedded = torch.empty(batch_size, seq_len, 0, device=continuous_data.device)
        
        # 연속형과 범주형 결합
        combined_input = torch.cat([continuous_data, categorical_embedded], dim=-1)
        
        # 패딩된 위치를 마스킹
        combined_input = combined_input.masked_fill(
            masks.unsqueeze(-1), self.padding_value
        )
        
        # RNN forward
        rnn_output, _ = self.rnn(combined_input)
        
        # RNN 출력 차원 변환
        projected_output = self.rnn_projection(rnn_output)
        
        # Self-Attention 적용
        attended_output = self.self_attention(projected_output, masks)
        
        # 출력 레이어
        predictions = self.output_layer(attended_output).squeeze(-1)
        
        # 패딩된 위치는 0으로 마스킹
        predictions = predictions.masked_fill(masks, 0.0)
        
        return predictions

### CNN 1D 모델 (models/cnn.py)

In [7]:
class CNN1DModel(nn.Module):
    """1D CNN 모델 (다중 커널)"""
    
    def __init__(
        self,
        vocab_sizes: List[int],  # 각 범주형 변수의 고유값 개수 리스트 [277, 7, 3, 20]
        continuous_dim: int,
        embedding_dim: int = 8,
        kernel_sizes: List[int] = [3, 5, 7],
        num_filters: int = 64,
        dropout: float = 0.1,
        padding_value: float = -999999.0
    ):
        super().__init__()
        
        self.embedding_dim = embedding_dim
        self.continuous_dim = continuous_dim
        self.kernel_sizes = kernel_sizes
        self.num_filters = num_filters
        self.padding_value = padding_value
        self.num_categorical_vars = len(vocab_sizes)  # 범주형 변수의 개수
        
        # 각 범주형 변수별로 별도의 임베딩 레이어 생성
        self.embeddings = nn.ModuleList([
            nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
            for vocab_size in vocab_sizes
        ])
        
        # 입력 차원 = 연속형 차원 + (범주형 변수 개수 × 임베딩 차원)
        input_dim = continuous_dim + self.num_categorical_vars * embedding_dim
        
        # 다중 커널 1D Conv 레이어들
        self.conv_layers = nn.ModuleList([
            nn.Conv1d(input_dim, num_filters, kernel_size, padding=kernel_size//2)
            for kernel_size in kernel_sizes
        ])
        
        # Batch normalization
        self.batch_norms = nn.ModuleList([
            nn.BatchNorm1d(num_filters) for _ in kernel_sizes
        ])
        
        # 출력 레이어
        total_filters = len(kernel_sizes) * num_filters
        self.output_layer = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(total_filters, total_filters // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(total_filters // 2, 1)
        )
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, continuous_data, categorical_data, masks, sequence_lengths):
        """
        Args:
            continuous_data: [batch_size, seq_len, continuous_dim]
            categorical_data: [batch_size, seq_len, num_categorical]
            masks: [batch_size, seq_len] (True = 패딩)
        """
        batch_size, seq_len = continuous_data.shape[:2]
        
        # 범주형 데이터 임베딩
        embedded_categorical = []
        for i, embedding in enumerate(self.embeddings):
            embedded = embedding(categorical_data[:, :, i])
            embedded_categorical.append(embedded)
        
        if embedded_categorical:
            categorical_embedded = torch.cat(embedded_categorical, dim=-1)
        else:
            categorical_embedded = torch.empty(batch_size, seq_len, 0, device=continuous_data.device)
        
        # 연속형과 범주형 결합
        combined_input = torch.cat([continuous_data, categorical_embedded], dim=-1)
        
        # 패딩된 위치를 마스킹
        combined_input = combined_input.masked_fill(
            masks.unsqueeze(-1), self.padding_value
        )
        
        # Conv1d를 위해 차원 변환: [batch, seq_len, features] -> [batch, features, seq_len]
        conv_input = combined_input.transpose(1, 2)
        
        # 다중 커널 Conv1D 적용
        conv_outputs = []
        for conv, bn in zip(self.conv_layers, self.batch_norms):
            conv_out = F.relu(bn(conv(conv_input)))  # [batch, filters, seq_len]
            conv_outputs.append(conv_out)
        
        # 모든 커널 출력 결합
        combined_conv = torch.cat(conv_outputs, dim=1)  # [batch, total_filters, seq_len]
        
        # 다시 원래 차원으로: [batch, total_filters, seq_len] -> [batch, seq_len, total_filters]
        combined_conv = combined_conv.transpose(1, 2)
        
        # 출력 레이어
        predictions = self.output_layer(combined_conv).squeeze(-1)
        
        # 패딩된 위치는 0으로 마스킹
        predictions = predictions.masked_fill(masks, 0.0)
        
        return predictions

### 모델 팩토리

In [8]:
def create_model(model_config: Dict, vocab_sizes: List[int], continuous_dim: int):
    """설정에 따른 모델 생성"""
    
    model_type = model_config.get("model_type", "lstm").lower()
    embedding_dim = model_config.get("embedding_dim", 8)
    hidden_dim = model_config.get("hidden_dim", 128)
    num_layers = model_config.get("num_layers", 2)
    dropout = model_config.get("dropout", 0.1)
    bidirectional = model_config.get("bidirectional", True)
    padding_value = model_config.get("padding_value", -999999.0)
    
    if model_type in ["rnn", "lstm", "gru"]:
        model = RNNModel(
            vocab_sizes=vocab_sizes,
            continuous_dim=continuous_dim,
            embedding_dim=embedding_dim,
            rnn_type=model_type.upper(),
            hidden_dim=hidden_dim,
            num_layers=num_layers,
            dropout=dropout,
            bidirectional=bidirectional,
            padding_value=padding_value
        )
        
    elif model_type in ["rnn_attention", "lstm_attention", "gru_attention"]:
        rnn_type = model_type.replace("_attention", "").upper()
        num_attention_heads = model_config.get("num_attention_heads", 8)
        
        model = RNNAttentionModel(
            vocab_sizes=vocab_sizes,
            continuous_dim=continuous_dim,
            embedding_dim=embedding_dim,
            rnn_type=rnn_type,
            hidden_dim=hidden_dim,
            num_layers=num_layers,
            num_attention_heads=num_attention_heads,
            dropout=dropout,
            bidirectional=bidirectional,
            padding_value=padding_value
        )
        
    elif model_type == "cnn1d":
        kernel_sizes = model_config.get("kernel_sizes", [3, 5, 7])
        num_filters = model_config.get("num_filters", 64)
        
        model = CNN1DModel(
            vocab_sizes=vocab_sizes,
            continuous_dim=continuous_dim,
            embedding_dim=embedding_dim,
            kernel_sizes=kernel_sizes,
            num_filters=num_filters,
            dropout=dropout,
            padding_value=padding_value
        )
        
    else:
        raise ValueError(f"Unknown model_type: {model_type}")
    
    logger.info(f"모델 생성 완료:")
    logger.info(f"  - 모델 타입: {model_type}")
    logger.info(f"  - 총 파라미터 수: {sum(p.numel() for p in model.parameters()):,}")
    logger.info(f"  - 학습 가능한 파라미터 수: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
    
    return model

## 🚂 훈련 및 평가 함수들

### 마스크 기반 손실 함수

In [9]:
class MaskedMSELoss(nn.Module):
    """패딩을 고려한 MSE Loss"""
    
    def __init__(self, padding_value: float = -999999.0):
        super().__init__()
        self.padding_value = padding_value
    
    def forward(self, predictions, targets, masks):
        """
        Args:
            predictions: [batch_size, seq_len]
            targets: [batch_size, seq_len]  
            masks: [batch_size, seq_len] (True = 패딩)
        """
        # 패딩되지 않은 위치만 선택
        valid_mask = ~masks
        
        if valid_mask.sum() == 0:
            return torch.tensor(0.0, device=predictions.device, requires_grad=True)
        
        valid_predictions = predictions[valid_mask]
        valid_targets = targets[valid_mask]
        
        return F.mse_loss(valid_predictions, valid_targets)


def compute_metrics(predictions, targets, masks, padding_value: float = -999999.0):
    """패딩을 고려한 메트릭 계산"""
    valid_mask = ~masks
    
    if valid_mask.sum() == 0:
        return {"mse": 0.0, "rmse": 0.0, "mae": 0.0, "mape": 0.0, "valid_count": 0}
    
    valid_predictions = predictions[valid_mask]
    valid_targets = targets[valid_mask]
    
    # CPU로 변환
    valid_predictions = valid_predictions.detach().cpu().numpy()
    valid_targets = valid_targets.detach().cpu().numpy()
    
    mse = np.mean((valid_predictions - valid_targets) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(valid_predictions - valid_targets))
    
    # MAPE 계산 (0으로 나누기 방지)
    epsilon = 1e-8
    abs_targets = np.abs(valid_targets)
    abs_errors = np.abs(valid_predictions - valid_targets)
    safe_targets = np.maximum(abs_targets, epsilon)
    mape = np.mean(abs_errors / safe_targets * 100)
    
    return {
        "mse": mse,
        "rmse": rmse, 
        "mae": mae,
        "mape": mape,
        "valid_count": len(valid_predictions)
    }

### 훈련 에폭

In [10]:
def train_epoch(model, dataloader, criterion, optimizer, device, epoch):
    """한 에폭 훈련"""
    model.train()
    total_loss = 0.0
    total_metrics = {"mse": 0.0, "rmse": 0.0, "mae": 0.0, "mape": 0.0, "valid_count": 0}
    
    pbar = tqdm(
        enumerate(dataloader), 
        total=len(dataloader),
        desc=f"Epoch {epoch} [Train]",
        leave=False
    )
    
    for batch_idx, batch in pbar:
        continuous_data = batch["continuous_data"].to(device)
        categorical_data = batch["categorical_data"].to(device)
        targets = batch["targets"].to(device)
        masks = batch["masks"].to(device)
        sequence_lengths = batch["sequence_lengths"]
        
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(continuous_data, categorical_data, masks, sequence_lengths)
        loss = criterion(predictions, targets, masks)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # 메트릭 계산
        with torch.no_grad():
            batch_metrics = compute_metrics(predictions, targets, masks)
        
        total_loss += loss.item()
        for key in ["mse", "rmse", "mae", "mape"]:
            total_metrics[key] += batch_metrics[key]
        total_metrics["valid_count"] += batch_metrics["valid_count"]
        
        # 진행바 업데이트
        pbar.set_postfix({
            "Loss": f"{loss.item():.4f}",
            "MAPE": f"{batch_metrics['mape']:.2f}%"
        })
    
    pbar.close()
    
    # 평균 계산
    avg_loss = total_loss / len(dataloader)
    for key in ["mse", "rmse", "mae", "mape"]:
        total_metrics[key] = total_metrics[key] / len(dataloader)
    
    return avg_loss, total_metrics


def validate_epoch(model, dataloader, criterion, device, epoch=None):
    """검증 에폭"""
    model.eval()
    total_loss = 0.0
    total_metrics = {"mse": 0.0, "rmse": 0.0, "mae": 0.0, "mape": 0.0, "valid_count": 0}
    
    desc = f"Epoch {epoch} [Val]" if epoch is not None else "Validation"
    pbar = tqdm(dataloader, desc=desc, leave=False)
    
    with torch.no_grad():
        for batch in pbar:
            continuous_data = batch["continuous_data"].to(device)
            categorical_data = batch["categorical_data"].to(device)
            targets = batch["targets"].to(device)
            masks = batch["masks"].to(device)
            sequence_lengths = batch["sequence_lengths"]
            
            predictions = model(continuous_data, categorical_data, masks, sequence_lengths)
            loss = criterion(predictions, targets, masks)
            
            batch_metrics = compute_metrics(predictions, targets, masks)
            
            total_loss += loss.item()
            for key in ["mse", "rmse", "mae", "mape"]:
                total_metrics[key] += batch_metrics[key]
            total_metrics["valid_count"] += batch_metrics["valid_count"]
            
            pbar.set_postfix({
                "Loss": f"{loss.item():.4f}",
                "MAPE": f"{batch_metrics['mape']:.2f}%"
            })
    
    pbar.close()
    
    avg_loss = total_loss / len(dataloader)
    for key in ["mse", "rmse", "mae", "mape"]:
        total_metrics[key] = total_metrics[key] / len(dataloader)
    
    return avg_loss, total_metrics

### 메인 훈련 루프

In [11]:
def train_model(model, train_loader, val_loader, training_config, device, save_path):
    """메인 훈련 루프"""
    
    num_epochs = training_config.get("num_epochs", 100)
    learning_rate = training_config.get("learning_rate", 1e-3)
    patience = training_config.get("patience", 20)
    padding_value = training_config.get("padding_value", -999999.0)
    
    # 손실 함수 및 옵티마이저
    criterion = MaskedMSELoss(padding_value)
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=patience//2, verbose=True)
    
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    
    logger.info(f"훈련 시작: {num_epochs} 에폭, 학습률 {learning_rate}")
    
    # 에폭 진행바
    epoch_pbar = tqdm(range(1, num_epochs + 1), desc="Training Progress")
    
    for epoch in epoch_pbar:
        train_loss, train_metrics = train_epoch(
            model, train_loader, criterion, optimizer, device, epoch
        )
        val_loss, val_metrics = validate_epoch(
            model, val_loader, criterion, device, epoch
        )
        
        scheduler.step(val_loss)
        
        # 로그 출력
        logger.info(
            f"Epoch {epoch:3d}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, "
            f'Train MAPE={train_metrics["mape"]:.2f}%, Val MAPE={val_metrics["mape"]:.2f}%'
        )
        
        # 진행바 업데이트
        epoch_pbar.set_postfix({
            "T_Loss": f"{train_loss:.4f}",
            "V_Loss": f"{val_loss:.4f}",
            "V_MAPE": f'{val_metrics["mape"]:.2f}%',
            "Best": f"{best_val_loss:.4f}",
            "Patience": f"{patience_counter}/{patience}"
        })
        
        # 최고 모델 저장
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_loss': val_loss,
                'val_metrics': val_metrics,
                'train_metrics': train_metrics
            }, save_path)
            
            logger.info(f"  → Best model saved! (Val Loss: {val_loss:.4f})")
        else:
            patience_counter += 1
        
        # 조기 종료
        if patience_counter >= patience:
            logger.info(f"Early stopping at epoch {epoch}")
            break
    
    epoch_pbar.close()
    
    return {
        'best_val_loss': best_val_loss
    }


def evaluate_model(model, test_loader, device, model_path):
    """모델 평가 (구조 정보 포함)"""
    logger.info(f"모델 로드: {model_path}")
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    
    model = model.to(device)
    model.eval()
    
    padding_value = -999999.0
    criterion = MaskedMSELoss(padding_value)
    
    # 구조화된 결과를 위한 리스트들
    structured_predictions = []
    all_predictions = []
    all_targets = []
    total_loss = 0.0
    
    logger.info("테스트 시작")
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            continuous_data = batch["continuous_data"].to(device)
            categorical_data = batch["categorical_data"].to(device)
            targets = batch["targets"].to(device)
            masks = batch["masks"].to(device)
            sequence_lengths = batch["sequence_lengths"]
            
            # 구조 정보 추출
            timekey_hrs = batch["timekey_hrs"]
            oper_ids_list = batch["oper_ids_list"]
            
            predictions = model(continuous_data, categorical_data, masks, sequence_lengths)
            loss = criterion(predictions, targets, masks)
            total_loss += loss.item()
            
            # CPU로 변환
            predictions_cpu = predictions.cpu()
            targets_cpu = targets.cpu()
            masks_cpu = masks.cpu()
            
            # 배치 내 각 샘플에 대해 구조화된 결과 생성
            batch_size = predictions_cpu.shape[0]
            for sample_idx in range(batch_size):
                timekey_hr = timekey_hrs[sample_idx]
                oper_ids = oper_ids_list[sample_idx]
                sample_predictions = predictions_cpu[sample_idx]
                sample_targets = targets_cpu[sample_idx]
                sample_masks = masks_cpu[sample_idx]
                
                # 각 시퀀스 위치에 대해
                for seq_idx in range(sample_predictions.shape[0]):
                    # 패딩되지 않은 위치만 처리
                    if not sample_masks[seq_idx] and oper_ids[seq_idx] is not None:
                        pred_val = sample_predictions[seq_idx].item()
                        target_val = sample_targets[seq_idx].item()
                        oper_id = oper_ids[seq_idx]
                        
                        structured_predictions.append({
                            'timekey_hr': timekey_hr,
                            'oper_id': oper_id,
                            'predicted': pred_val,
                            'actual': target_val
                        })
                        
                        all_predictions.append(pred_val)
                        all_targets.append(target_val)
    
    avg_loss = total_loss / len(test_loader)
    
    # 메트릭 계산
    all_predictions = np.array(all_predictions)
    all_targets = np.array(all_targets)
    
    mse = np.mean((all_predictions - all_targets) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(all_predictions - all_targets))
    
    epsilon = 1e-8
    abs_targets = np.abs(all_targets)
    abs_errors = np.abs(all_predictions - all_targets)
    safe_targets = np.maximum(abs_targets, epsilon)
    mape = np.mean(abs_errors / safe_targets * 100)
    
    metrics = {
        "mse": mse,
        "rmse": rmse,
        "mae": mae,
        "mape": mape,
        "valid_count": len(all_predictions)
    }
    
    logger.info(f"테스트 결과: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.2f}%")
    logger.info(f"구조화된 예측 결과: {len(structured_predictions):,}개")
    
    return {
        "test_loss": avg_loss,
        "metrics": metrics,
        "predictions": all_predictions,
        "targets": all_targets,
        "structured_predictions": structured_predictions  # 추가
    }

## 🎯 메인 실행 함수

In [12]:
def main():
    """메인 실행 함수"""
    import argparse
    
    parser = argparse.ArgumentParser(description="시계열 시퀀스 모델링")
    parser.add_argument("--config-dir", default="configs", help="설정 파일 디렉토리")
    parser.add_argument("--mode", choices=["train", "eval"], default="train", help="실행 모드")
    parser.add_argument("--model-path", default=None, help="평가용 모델 경로")
    parser.add_argument("--gpu", type=int, default=0, help="GPU 번호")
    parser.add_argument("--exp-name", default=None, help="실험명")
    
    args = parser.parse_args([])
    
    # 설정 로드
    config = load_config(args.config_dir)
    set_random_seeds(42)
    
    # 실험명 설정
    if args.exp_name:
        exp_name = args.exp_name
    else:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        model_type = config.get("model_type", "lstm")
        exp_name = f"{model_type}_{timestamp}"
    
    # 저장 디렉토리
    save_dir = config.get("save_dir", "models")
    os.makedirs(save_dir, exist_ok=True)
    model_save_path = os.path.join(save_dir, f"{exp_name}.pth")
    
    # 디바이스 설정
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    logger.info(f"Using device: {device}")
    
    # 데이터로더 생성
    logger.info("데이터 로딩 중...")
    train_loader, val_loader, test_loader, categorical_processor = create_dataloaders(config)
    
    # 모델 생성
    vocab_sizes = categorical_processor.get_vocab_sizes()
    continuous_dim = len(config["continuous_columns"])
    
    model = create_model(config, vocab_sizes, continuous_dim)
    
    if args.mode == "train":
        # 훈련
        logger.info("훈련 시작...")
        train_results = train_model(
            model, train_loader, val_loader, config, device, model_save_path
        )
        
        logger.info("훈련 완료, 테스트 시작...")
        test_results = evaluate_model(model, test_loader, device, model_save_path)
        
    else:
        # 평가
        if not args.model_path:
            raise ValueError("--model-path must be provided in eval mode")
        test_results = evaluate_model(model, test_loader, device, args.model_path)
    
    # 결과 저장
    results = {
        "exp_name": exp_name,
        "config": config,
        "test_metrics": test_results["metrics"],
        "model_info": {
            "total_parameters": sum(p.numel() for p in model.parameters()),
            "model_type": config.get("model_type", "lstm")
        }
    }
    
    results_path = os.path.join(save_dir, f"{exp_name}_results.json")
    with open(results_path, "w") as f:
        json.dump(results, f, indent=2, default=str)
    
    # 예측 결과 저장
    if "structured_predictions" in test_results and test_results["structured_predictions"]:
        # 구조화된 예측 결과 저장 (timekey_hr, oper_id 포함)
        structured_df = pd.DataFrame(test_results["structured_predictions"])
        structured_df["error"] = structured_df["predicted"] - structured_df["actual"]
        structured_df["abs_error"] = structured_df["error"].abs()
        structured_df["abs_percent_error"] = (
            structured_df["abs_error"] / structured_df["actual"].abs().clip(lower=1e-8) * 100
        )
        
        # 구조화된 결과를 메인 예측 파일로 저장
        predictions_path = os.path.join(save_dir, f"{exp_name}_predictions.csv")
        structured_df.to_csv(predictions_path, index=False)
        
        logger.info(f"  - 구조화된 예측 결과: {predictions_path}")
        logger.info(f"  - 저장된 예측 개수: {len(structured_df):,}개")
        logger.info(f"  - 고유한 timekey_hr: {structured_df['timekey_hr'].nunique()}개")
        logger.info(f"  - 고유한 oper_id: {structured_df['oper_id'].nunique()}개")
        
    else:
        # 구조화된 정보가 없는 경우 기본 방식으로 저장 (호환성 유지)
        predictions_df = pd.DataFrame({
            "actual": test_results["targets"],
            "predicted": test_results["predictions"],
            "residual": test_results["targets"] - test_results["predictions"],
            "abs_error": np.abs(test_results["targets"] - test_results["predictions"]),
            "abs_percent_error": (
                np.abs(test_results["targets"] - test_results["predictions"]) / 
                np.maximum(np.abs(test_results["targets"]), 1e-8) * 100
            )
        })
        
        predictions_path = os.path.join(save_dir, f"{exp_name}_predictions.csv")
        predictions_df.to_csv(predictions_path, index=False)
        
        logger.info(f"  - 기본 예측 결과: {predictions_path}")
        logger.info(f"  - 저장된 예측 개수: {len(predictions_df):,}개")

In [13]:
if __name__ == "__main__":
    main()

2025-08-31 14:25:18,724 - INFO - Using device: cuda:0
2025-08-31 14:25:18,728 - INFO - 데이터 로딩 중...
2025-08-31 14:38:12,837 - INFO - 범주형 변수별 고유값 개수:
2025-08-31 14:38:12,838 - INFO -   oper_group: 277개
2025-08-31 14:38:12,839 - INFO -   days: 7개
2025-08-31 14:38:12,839 - INFO -   shift: 3개
2025-08-31 14:38:12,840 - INFO -   x1: 20개
2025-08-31 14:38:17,881 - INFO - 시퀀스 데이터셋 구성 완료:
2025-08-31 14:38:17,882 - INFO -   - 총 시퀀스 수: 2136
2025-08-31 14:38:17,883 - INFO -   - 최대 시퀀스 길이: 50
2025-08-31 14:38:17,883 - INFO -   - 특성 차원: 52
2025-08-31 14:38:17,884 - INFO -   - 패딩값: -999999.0
2025-08-31 14:38:20,385 - INFO - 시퀀스 데이터셋 구성 완료:
2025-08-31 14:38:20,386 - INFO -   - 총 시퀀스 수: 2136
2025-08-31 14:38:20,386 - INFO -   - 최대 시퀀스 길이: 50
2025-08-31 14:38:20,387 - INFO -   - 특성 차원: 52
2025-08-31 14:38:20,387 - INFO -   - 패딩값: -999999.0
2025-08-31 14:38:23,052 - INFO - 시퀀스 데이터셋 구성 완료:
2025-08-31 14:38:23,053 - INFO -   - 총 시퀀스 수: 2136
2025-08-31 14:38:23,054 - INFO -   - 최대 시퀀스 길이: 50
2025-08-31 14:38:

## 💡 하이퍼파라미터 튜닝 가이드

### 1. 모델별 권장 설정

#### **LSTM/GRU (기본)**
```yaml
model_type: "lstm"
hidden_dim: 128
num_layers: 2
bidirectional: true
dropout: 0.1
```

#### **LSTM + Self-Attention**
```yaml
model_type: "lstm_attention"
hidden_dim: 128
num_layers: 2
num_attention_heads: 8
bidirectional: true
dropout: 0.2
```

#### **CNN 1D**
```yaml
model_type: "cnn1d"
kernel_sizes: [3, 5, 7]
num_filters: 64
dropout: 0.1
```

### 2. 성능 최적화 팁

#### **과적합 방지**
```yaml
dropout: 0.3
patience: 10
learning_rate: 0.0001
```

#### **빠른 수렴**
```yaml
learning_rate: 0.01
scheduler_patience: 5
num_attention_heads: 4  # Attention 모델의 경우
```

#### **메모리 최적화**
```yaml
max_sequence_length: 30
batch_size: 16
num_layers: 1
bidirectional: false
```

## 📝 실행 예시

### 기본 훈련
```bash
python main.py --mode train --gpu 0
```

### 특정 모델 훈련
```bash
# LSTM with Attention
python main.py --mode train --exp-name lstm_attention_exp1

# CNN 1D  
python main.py --mode train --exp-name cnn1d_exp1

# 설정 변경 후
python main.py --mode train --config-dir ./my_configs
```

### 모델 평가
```bash
python main.py --mode eval --model-path ./models/lstm_20241201_120000.pth
```

**참고**: 이 프로젝트는 시퀀스 모델링 관점에서 공정 데이터를 처리하며, timekey_hr 내 oper_id 순서를 고려한 sequence-to-sequence 예측을 수행합니다.