# ❗ 5. SKHynix PBL 시계열 시퀀스 모델링 ❗


## 📌 개요

시간대(timekey_hr) 내에서 공정 순서(oper_id)를 고려한 시퀀스 기반 TAT 예측 모델입니다. 동일한 timekey_hr 내의 oper_id들을 순서대로 정렬하여 시퀀스 데이터로 구성하고, 각 oper별 개별 예측(sequence-to-sequence)을 수행합니다.

**데이터 구조**: `[batch_size, sequence_length, feature_dim]`
- **sequence_length**: timekey_hr 내 최대 oper_id 개수 (하이퍼파라미터)
- **feature_dim**: 연속형 변수 개수 + 범주형 변수 개수 × 임베딩 차원

**지원 모델**:
1. **RNN/LSTM/GRU**: 기본 순환 신경망
2. **RNN + Self-Attention**: 순환 신경망에 어텐션 메커니즘 추가
3. **CNN 1D**: 다중 커널 1차원 합성곱 신경망
4. **Transformer Encoder**: Transformer Encoder 기반의 모델

## 🔧 환경 설정 및 라이브러리

In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import yaml
import logging
import json
from datetime import datetime
from tqdm import tqdm
from typing import Dict, List, Tuple, Optional, Union

# sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# PyTorch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

## 📊 유틸리티 함수들

### 설정 로딩 및 시드 설정

In [2]:
def load_config(config_dir: str = "configs") -> Dict:
    """YAML 설정 파일들을 통합하여 로드"""
    configs = {}
    config_files = ["dataset", "model", "training"]

    for file in config_files:
        config_path = os.path.join(config_dir, f"{file}.yaml")
        with open(config_path, "r", encoding="utf-8") as f:
            config = yaml.safe_load(f)
            configs.update(config)

    return configs


def set_random_seeds(seed: int = 42):
    """재현성을 위한 랜덤 시드 설정"""
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Global logger 변수
logger = None

def setup_logging(log_file: str = "training.log"):
    """로깅 설정 및 global logger 설정"""
    global logger
    
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[
            logging.FileHandler(log_file),
            logging.StreamHandler()
        ],
        force=True  # 기존 핸들러 제거 후 새로 설정
    )
    
    logger = logging.getLogger(__name__)
    return logger

### 범주형 데이터 처리기

In [3]:
class CategoricalProcessor:
    """범주형 변수 임베딩을 위한 처리기"""
    
    def __init__(self, embedding_dim: int = 8):
        self.embedding_dim = embedding_dim
        self.label_encoders = {}
        self.vocab_sizes = {}
        self.categorical_columns = []
        
    def fit(self, df: pd.DataFrame, categorical_columns: List[str]):
        """전체 데이터에 대해 범주형 인코더 학습"""
        self.categorical_columns = categorical_columns
        
        for col in categorical_columns:
            unique_values = df[col].astype(str).unique()
            encoder = LabelEncoder()
            encoder.fit(unique_values)
            
            self.label_encoders[col] = encoder
            self.vocab_sizes[col] = len(encoder.classes_)
        
        logger.info(f"범주형 변수별 고유값 개수:")
        for col in categorical_columns:
            logger.info(f"  {col}: {self.vocab_sizes[col]}개")
    
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """DataFrame의 범주형 컬럼들을 숫자로 변환"""
        df_encoded = df.copy()
        
        for col in self.categorical_columns:
            df_encoded[col] = self.label_encoders[col].transform(
                df_encoded[col].astype(str)
            )
        
        return df_encoded
    
    def get_vocab_sizes(self) -> List[int]:
        """각 범주형 변수의 vocab_size 리스트 반환"""
        return [self.vocab_sizes[col] for col in self.categorical_columns]

## 🗂️ 시퀀스 데이터셋 클래스

### 메인 데이터셋

In [4]:
import os
import re
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import yaml
import logging
from datetime import datetime
from tqdm import tqdm
from typing import Dict, List, Tuple, Optional, Union

# sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# PyTorch
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

def extract_oper_number(oper_id):
    """oper_id에서 숫자 부분 추출 (예: 'oper123' -> 123)"""
    match = re.search(r'\d+', str(oper_id))
    return int(match.group()) if match else 0


class CategoricalProcessor:
    """통합된 범주형 변수 처리기 - 모든 범주형 변수를 하나의 vocabulary로 통합"""
    
    def __init__(self, categorical_columns: List[str], categories: List[int], embedding_dim: int = 8):
        """
        Args:
            categorical_columns: 범주형 컬럼명 리스트 (예: ["oper_group", "days", "shift", "x1"])
            categories: 각 범주형 변수의 카테고리 수 (예: [277, 7, 3, 20])
            embedding_dim: 임베딩 차원
        """
        self.categorical_columns = categorical_columns
        self.categories = categories
        self.embedding_dim = embedding_dim
        
        # 각 변수별 인코더와 오프셋 계산
        self.label_encoders = {}
        self.category_offsets = {}  # 각 변수의 시작 인덱스
        self.category_ranges = {}   # 각 변수의 (start, end) 범위
        
        # 통합 vocabulary 크기 계산
        self.total_vocab_size = sum(categories)
        
        logger.info(f"통합된 범주형 변수 설정:")
        logger.info(f"  - 변수별 카테고리 수: {dict(zip(categorical_columns, categories))}")
        logger.info(f"  - 총 vocabulary 크기: {self.total_vocab_size}")
        
    def fit(self, df: pd.DataFrame):
        """전체 데이터에 대해 통합 범주형 인코더 학습"""
        
        current_offset = 0
        
        for i, col in enumerate(self.categorical_columns):
            # 해당 변수의 고유값들 추출
            unique_values = df[col].astype(str).unique()
            
            # LabelEncoder 학습 (0부터 시작)
            encoder = LabelEncoder()
            encoder.fit(unique_values)
            
            # 실제 고유값 개수 확인
            actual_vocab_size = len(encoder.classes_)
            expected_vocab_size = self.categories[i]
            
            if actual_vocab_size != expected_vocab_size:
                logger.info(f"  경고: {col}의 실제 카테고리 수({actual_vocab_size})가 설정값({expected_vocab_size})과 다릅니다")
                # 실제값으로 업데이트
                self.categories[i] = actual_vocab_size
            
            self.label_encoders[col] = encoder
            self.category_offsets[col] = current_offset
            self.category_ranges[col] = (current_offset, current_offset + actual_vocab_size)
            
            logger.info(f"  - {col}: 인덱스 {current_offset}~{current_offset + actual_vocab_size - 1} ({actual_vocab_size}개)")
            
            current_offset += actual_vocab_size
        
        # 총 vocabulary 크기 재계산
        self.total_vocab_size = current_offset
        logger.info(f"  - 최종 통합 vocabulary 크기: {self.total_vocab_size}")
        
        return self
    
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """DataFrame의 범주형 컬럼들을 통합 인덱스로 변환"""
        df_encoded = df.copy()
        
        for col in self.categorical_columns:
            # 먼저 LabelEncoder로 0부터 시작하는 인덱스로 변환
            encoded_values = self.label_encoders[col].transform(df_encoded[col].astype(str))
            
            # 오프셋 추가하여 통합 vocabulary 인덱스로 변환
            df_encoded[col] = encoded_values + self.category_offsets[col]
        
        return df_encoded
    
    def get_total_vocab_size(self) -> int:
        """통합된 총 vocabulary 크기 반환"""
        return self.total_vocab_size
    
    def get_category_info(self) -> Dict:
        """범주형 변수 정보 딕셔너리 반환"""
        return {
            'categorical_columns': self.categorical_columns,
            'categories': self.categories,
            'category_offsets': self.category_offsets,
            'category_ranges': self.category_ranges,
            'total_vocab_size': self.total_vocab_size
        }
    
    def decode_categorical_value(self, encoded_value: int) -> Tuple[str, str]:
        """통합 인덱스를 원래 (변수명, 값) 으로 디코딩"""
        for col in self.categorical_columns:
            start, end = self.category_ranges[col]
            if start <= encoded_value < end:
                # 오프셋 제거하여 원래 LabelEncoder 인덱스로 변환
                original_idx = encoded_value - self.category_offsets[col]
                # 원래 값 복원
                original_value = self.label_encoders[col].inverse_transform([original_idx])[0]
                return col, original_value
        
        return "unknown", "unknown"


class SequenceOperDataset(Dataset):
    """시퀀스 기반 oper 데이터셋 - 통합 임베딩 + Window sliding"""
    
    def __init__(
        self,
        df: pd.DataFrame,
        categorical_columns: List[str],
        continuous_columns: List[str],
        categories: List[int],  # 새로운 파라미터
        target_column: str = "y",
        categorical_processor: Optional[CategoricalProcessor] = None,
        window_size: int = 30,
        stride: int = None,
        embedding_dim: int = 8,
        padding_value: float = -9999.0
    ):
        self.df = df.copy()
        self.categorical_columns = categorical_columns
        self.continuous_columns = continuous_columns
        self.categories = categories
        self.target_column = target_column
        self.window_size = window_size
        self.stride = stride if stride is not None else window_size
        self.embedding_dim = embedding_dim
        self.padding_value = padding_value
        
        # 통합 범주형 데이터 처리기 설정
        if categorical_processor is None:
            self.categorical_processor = CategoricalProcessor(
                categorical_columns, categories, embedding_dim
            )
            self.categorical_processor.fit(df)
        else:
            self.categorical_processor = categorical_processor
        
        # 통합 임베딩 레이어 생성 (학습 가능)
        total_vocab_size = self.categorical_processor.get_total_vocab_size()
        self.categorical_embedding = nn.Embedding(
            total_vocab_size, embedding_dim, padding_idx=0
        )
        
        # 데이터 전처리 및 시퀀스 생성
        self._preprocess_data()
        self._create_windowed_sequences()
        
        logger.info(f"시퀀스 데이터셋 구성 완료:")
        logger.info(f"  - 총 샘플 수: {len(self.samples)}")
        logger.info(f"  - Window 크기: {window_size}")
        logger.info(f"  - Stride: {self.stride}")
        logger.info(f"  - 연속형 차원: {len(continuous_columns)}")
        logger.info(f"  - 범주형 변수 수: {len(categorical_columns)}")
        logger.info(f"  - 임베딩 차원: {embedding_dim}")
        logger.info(f"  - 최종 특성 차원: {len(continuous_columns) + len(categorical_columns) * embedding_dim}")
        logger.info(f"  - 패딩값: {padding_value}")
    
    def _preprocess_data(self):
        """데이터 전처리"""
        # timekey_hr에서 날짜(day) 추출
        self.df['date'] = (self.df['timekey_hr'] // 100).astype(int)
        
        # 통합 범주형 데이터 인코딩
        if self.categorical_columns:
            self.df = self.categorical_processor.transform(self.df)
        
        # 최종 특성 차원 계산
        self.continuous_dim = len(self.continuous_columns)
        self.categorical_dim = len(self.categorical_columns) * self.embedding_dim
        self.feature_dim = self.continuous_dim + self.categorical_dim
    
    def _create_windowed_sequences(self):
        """timekey_hr별로 window sliding하여 시퀀스 생성"""
        self.samples = []
        
        # timekey_hr별로 그룹화
        grouped = self.df.groupby('timekey_hr')
        
        for timekey_hr, group in grouped:
            # oper_id 순서로 정렬
            group_sorted = group.iloc[group['oper_id'].map(extract_oper_number).argsort()].reset_index(drop=True)
            
            if len(group_sorted) == 0:
                continue
            
            # Window sliding
            for start_idx in range(0, len(group_sorted), self.stride):
                end_idx = start_idx + self.window_size
                
                # 윈도우 데이터 추출
                window_data = group_sorted.iloc[start_idx:min(end_idx, len(group_sorted))]
                actual_length = len(window_data)
                
                if actual_length == 0:
                    continue
                
                # 연속형 데이터 추출 및 패딩
                if self.continuous_columns:
                    continuous_data = window_data[self.continuous_columns].values
                    if actual_length < self.window_size:
                        padding_rows = self.window_size - actual_length
                        padding_matrix = np.full(
                            (padding_rows, len(self.continuous_columns)), 
                            self.padding_value, 
                            dtype=np.float32
                        )
                        continuous_data = np.vstack([continuous_data, padding_matrix])
                    continuous_data = continuous_data.astype(np.float32)
                else:
                    continuous_data = np.empty((self.window_size, 0), dtype=np.float32)
                
                # 범주형 데이터 추출 및 패딩 (통합 인덱스, 1부터 시작)
                if self.categorical_columns:
                    categorical_data = window_data[self.categorical_columns].values
                    if actual_length < self.window_size:
                        padding_rows = self.window_size - actual_length
                        # 패딩은 0으로 (패딩용 인덱스)
                        padding_matrix = np.zeros(
                            (padding_rows, len(self.categorical_columns)), 
                            dtype=np.int64
                        )
                        categorical_data = np.vstack([categorical_data, padding_matrix])
                    categorical_data = categorical_data.astype(np.int64)
                else:
                    categorical_data = np.empty((self.window_size, 0), dtype=np.int64)
                
                # 타겟 데이터 추출 및 패딩
                target_data = window_data[self.target_column].values
                if actual_length < self.window_size:
                    padding_rows = self.window_size - actual_length
                    padding_targets = np.full(padding_rows, self.padding_value, dtype=np.float32)
                    target_data = np.hstack([target_data, padding_targets])
                target_data = target_data.astype(np.float32)
                
                # 마스크 생성 (True = 패딩된 위치)
                mask = np.zeros(self.window_size, dtype=bool)
                if actual_length < self.window_size:
                    mask[actual_length:] = True
                
                # oper_id 정보 (전체 리스트)
                oper_ids = window_data['oper_id'].values.tolist()
                if actual_length < self.window_size:
                    oper_ids.extend([None] * (self.window_size - actual_length))
                
                sample_info = {
                    'timekey_hr': timekey_hr,
                    'oper_ids_list': oper_ids,
                    'continuous_data': continuous_data,
                    'categorical_data': categorical_data,
                    'target_data': target_data,
                    'mask': mask,
                    'actual_length': actual_length,
                    'first_oper_id': window_data['oper_id'].iloc[0],
                    'last_oper_id': window_data['oper_id'].iloc[-1]
                }
                
                self.samples.append(sample_info)
        
        logger.info(f"Window sliding 결과:")
        logger.info(f"  - 총 샘플 수: {len(self.samples)}")
        if self.samples:
            actual_lengths = [sample['actual_length'] for sample in self.samples]
            logger.info(f"  - 평균 실제 길이: {np.mean(actual_lengths):.1f}")
            logger.info(f"  - 최대 실제 길이: {np.max(actual_lengths)}")
            logger.info(f"  - 최소 실제 길이: {np.min(actual_lengths)}")
            
            timekey_hrs = [sample['timekey_hr'] for sample in self.samples]
            unique_timekey_hrs = len(set(timekey_hrs))
            logger.info(f"  - 고유한 timekey_hr: {unique_timekey_hrs}개")
            logger.info(f"  - timekey_hr당 평균 샘플 수: {len(self.samples)/unique_timekey_hrs:.1f}개")
    
    def __len__(self):
        return len(self.samples)
    
    # def __getitem__(self, idx):
    #     sample = self.samples[idx]
        
    #     # 연속형 데이터
    #     continuous_data = torch.tensor(sample['continuous_data'])  # [window_size, continuous_dim]
        
    #     # 범주형 데이터 → 임베딩 적용 → flatten
    #     categorical_data = torch.tensor(sample['categorical_data'])  # [window_size, num_categorical]
        
    #     # 임베딩 적용: [window_size, num_categorical, embed_dim]
    #     with torch.no_grad():  # 여기서는 gradient 계산 안함 (forward에서 계산)
    #         categorical_embedded = self.categorical_embedding(categorical_data)
        
    #     # Flatten: [window_size, num_categorical * embed_dim]
    #     window_size, num_categorical, embed_dim = categorical_embedded.shape
    #     categorical_flattened = categorical_embedded.view(window_size, num_categorical * embed_dim)
        
    #     # 연속형 + 범주형 결합: [window_size, total_feature_dim]
    #     if continuous_data.numel() > 0:  # 연속형 변수가 있는 경우
    #         combined_features = torch.cat([continuous_data, categorical_flattened], dim=-1)
    #     else:  # 연속형 변수가 없는 경우
    #         combined_features = categorical_flattened
        
    #     return {
    #         'features': combined_features,  # 최종 결합된 특성
    #         'targets': torch.tensor(sample['target_data']),
    #         'masks': torch.tensor(sample['mask']),
    #         'actual_length': sample['actual_length'],
    #         'timekey_hr': sample['timekey_hr'],
    #         'oper_ids_list': sample['oper_ids_list'],
    #         'first_oper_id': sample['first_oper_id'],
    #         'last_oper_id': sample['last_oper_id'],
    #         # 디버깅용 원본 데이터도 포함
    #         'continuous_data': continuous_data,
    #         'categorical_data': categorical_data
    #     }
    def __getitem__(self, idx):
        """디버깅 버전의 __getitem__ 메소드"""
        sample = self.samples[idx]
        

        continuous_data = torch.tensor(sample['continuous_data'])
        categorical_data = torch.tensor(sample['categorical_data'])
        
        # 임베딩 적용 전 범위 체크
        max_index = categorical_data.max()
        vocab_size = self.categorical_embedding.num_embeddings
        
        with torch.no_grad():
            categorical_embedded = self.categorical_embedding(categorical_data)
        
        # Flatten
        window_size, num_categorical, embed_dim = categorical_embedded.shape
        categorical_flattened = categorical_embedded.view(window_size, num_categorical * embed_dim)
        
        if continuous_data.numel() > 0:
            combined_features = torch.cat([continuous_data, categorical_flattened], dim=-1)
        else:
            combined_features = categorical_flattened
        
        targets = torch.tensor(sample['target_data'])

        # # # 디버깅 정보 (처음 몇 개 샘플만)
        # debug_mode = idx < 3
        # if debug_mode:
        #     print(f"\n=== 샘플 {idx} 디버깅 ===")
        #     print(f"timekey_hr: {sample['timekey_hr']}")
        #     print(f"actual_length: {sample['actual_length']}")

        # # 1. 연속형 데이터 확인
        #     print(f"연속형 데이터: {continuous_data.shape}")
        #     print(f"  NaN: {torch.isnan(continuous_data).sum()}")
        #     print(f"  Inf: {torch.isinf(continuous_data).sum()}")
        #     print(f"  범위: [{continuous_data.min():.4f}, {continuous_data.max():.4f}]")
        
        # # 2. 범주형 데이터 확인  
        #     print(f"범주형 데이터: {categorical_data.shape}")
        #     print(f"  값 범위: [{categorical_data.min()}, {categorical_data.max()}]")
        #     print(f"  고유값: {torch.unique(categorical_data).tolist()}")
        
        # # 3. 임베딩 적용 (가장 중요한 부분)
        # try:
        #     if debug_mode:
        #         print(f"임베딩 적용 중...")
        #         print(f"  임베딩 vocab_size: {self.categorical_embedding.num_embeddings}")
        #         print(f"  임베딩 embed_dim: {self.categorical_embedding.embedding_dim}")
                
        #         if max_index >= vocab_size:
        #             print(f"❌ CRITICAL: 임베딩 범위 초과!")
        #             print(f"   최대 인덱스: {max_index}, Vocab 크기: {vocab_size}")
        #             print(f"   범주형 데이터: {categorical_data}")
        #             raise ValueError(f"Embedding index out of range: {max_index} >= {vocab_size}")
                
        #         print(f"  임베딩 결과: {categorical_embedded.shape}")
        #         print(f"  임베딩 NaN: {torch.isnan(categorical_embedded).sum()}")
        #         print(f"  임베딩 Inf: {torch.isinf(categorical_embedded).sum()}")

        #         print(f"  Flatten 후: {categorical_flattened.shape}")
        #         print(f"  Flatten NaN: {torch.isnan(categorical_flattened).sum()}")
            
        # except Exception as e:
        #     print(f"❌ 임베딩 적용 실패 (샘플 {idx}): {e}")
        #     print(f"   categorical_data 상세:")
        #     print(f"   형태: {categorical_data.shape}")
        #     print(f"   타입: {categorical_data.dtype}")  
        #     print(f"   값: {categorical_data}")
        #     raise e
        
        
        # if debug_mode:
            
        #     # 4. 연속형과 범주형 결합
        #     print(f"최종 결합: {combined_features.shape}")
        #     print(f"  결합 NaN: {torch.isnan(combined_features).sum()}")
        #     print(f"  결합 Inf: {torch.isinf(combined_features).sum()}")
        #     if torch.isnan(combined_features).sum() == 0 and torch.isinf(combined_features).sum() == 0:
        #         print(f"  결합 범위: [{combined_features.min():.4f}, {combined_features.max():.4f}]")
        
        #     # 5. 타겟 데이터 확인
        #     print(f"타겟 데이터: {targets.shape}")
        #     print(f"  타겟 NaN: {torch.isnan(targets).sum()}")
        #     print(f"  타겟 범위: [{targets.min():.4f}, {targets.max():.4f}]")
        
        # 최종 반환값에 NaN/Inf가 있는지 체크
        final_nan = (torch.isnan(combined_features).sum() + 
                    torch.isnan(targets).sum() +
                    torch.isinf(combined_features).sum() + 
                    torch.isinf(targets).sum())
        
        if final_nan > 0:
            print(f"❌ CRITICAL: 샘플 {idx}에서 최종 NaN/Inf 발견!")
            print(f"   Features NaN: {torch.isnan(combined_features).sum()}")
            print(f"   Features Inf: {torch.isinf(combined_features).sum()}")  
            print(f"   Targets NaN: {torch.isnan(targets).sum()}")
            print(f"   Targets Inf: {torch.isinf(targets).sum()}")
            
            # 어디서 NaN이 생겼는지 추적
            if torch.isnan(continuous_data).sum() > 0:
                print(f"   → 연속형 데이터에서 NaN 발생")
            if torch.isnan(categorical_flattened).sum() > 0:
                print(f"   → 범주형 임베딩에서 NaN 발생")
        
            print(f"=== 샘플 {idx} 디버깅 완료 ===\n")
        
        return {
            'features': combined_features,
            'targets': targets,
            'masks': torch.tensor(sample['mask']),
            'actual_length': sample['actual_length'],
            'timekey_hr': sample['timekey_hr'],
            'oper_ids_list': sample['oper_ids_list'],
            'first_oper_id': sample['first_oper_id'],
            'last_oper_id': sample['last_oper_id'],
            'continuous_data': continuous_data,
            'categorical_data': categorical_data
        }
    
    def get_embedding_layer(self) -> nn.Embedding:
        """임베딩 레이어 반환 (모델에서 사용)"""
        return self.categorical_embedding
    
    def get_feature_dim(self) -> int:
        """최종 특성 차원 반환"""
        return self.feature_dim


def split_data_by_days(
    df: pd.DataFrame, 
    train_ratio: float = 0.8, 
    val_ratio: float = 0.1, 
    test_ratio: float = 0.1
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """날짜 기준으로 데이터를 분할"""
    
    # timekey_hr에서 날짜(day) 추출
    df['date'] = (df['timekey_hr'].astype(int) // 100).astype(int)
    
    # 고유한 날짜들을 시간순으로 정렬
    unique_dates = sorted(df['date'].unique())
    total_days = len(unique_dates)
    
    # 날짜 기준으로 분할 인덱스 계산
    train_days = int(total_days * train_ratio)
    val_days = int(total_days * val_ratio)
    
    train_dates = unique_dates[:train_days]
    val_dates = unique_dates[train_days:train_days + val_days]
    test_dates = unique_dates[train_days + val_days:]
    
    # 각 분할에 해당하는 데이터 추출
    train_df = df[df['date'].isin(train_dates)].copy()
    val_df = df[df['date'].isin(val_dates)].copy()
    test_df = df[df['date'].isin(test_dates)].copy()
    
    logger.info(f"날짜 기준 데이터 분할 완료:")
    logger.info(f"  - 총 날짜 수: {total_days}일")
    logger.info(f"  - Train: {len(train_dates)}일 ({len(train_df):,}행)")
    logger.info(f"  - Validation: {len(val_dates)}일 ({len(val_df):,}행)")  
    logger.info(f"  - Test: {len(test_dates)}일 ({len(test_df):,}행)")
    logger.info(f"  - Train 날짜 범위: {min(train_dates)} ~ {max(train_dates)}")
    logger.info(f"  - Val 날짜 범위: {min(val_dates)} ~ {max(val_dates)}")
    logger.info(f"  - Test 날짜 범위: {min(test_dates)} ~ {max(test_dates)}")
    
    return train_df, val_df, test_df


def sequence_collate_fn(batch):
    """시퀀스 배치 collate 함수"""
    
    # 주요 텐서들 추출
    features = torch.stack([sample['features'] for sample in batch])
    targets = torch.stack([sample['targets'] for sample in batch])
    masks = torch.stack([sample['masks'] for sample in batch])
    
    # 메타 정보들
    actual_lengths = [sample['actual_length'] for sample in batch]
    timekey_hrs = [sample['timekey_hr'] for sample in batch]
    oper_ids_lists = [sample['oper_ids_list'] for sample in batch]
    first_oper_ids = [sample['first_oper_id'] for sample in batch]
    last_oper_ids = [sample['last_oper_id'] for sample in batch]
    
    # 디버깅용 원본 데이터
    continuous_data = torch.stack([sample['continuous_data'] for sample in batch])
    categorical_data = torch.stack([sample['categorical_data'] for sample in batch])
    
    return {
        'features': features,  # 최종 결합된 특성 [batch_size, window_size, total_feature_dim]
        'targets': targets,
        'masks': masks,
        'actual_lengths': actual_lengths,
        'timekey_hrs': timekey_hrs,
        'oper_ids_lists': oper_ids_lists,
        'first_oper_ids': first_oper_ids,
        'last_oper_ids': last_oper_ids,
        # 디버깅용
        'continuous_data': continuous_data,
        'categorical_data': categorical_data
    }

def create_dataloaders(dataset_config: Dict) -> Tuple[DataLoader, DataLoader, DataLoader, CategoricalProcessor, nn.Embedding]:
    """데이터로더 생성 - 날짜 기준 분할 + 통합 임베딩 + Window sliding"""
    
    # 데이터 로드
    data_path = dataset_config["file_path"]
    excel = pd.read_excel(data_path, sheet_name=None, header=1)
    sheet_names = dataset_config["sheet_names"]
    
    total_df = pd.concat([excel[sheet_name] for sheet_name in sheet_names])
    
    # 기본 전처리
    if "Unnamed: 0" in total_df.columns:
        total_df.drop(columns="Unnamed: 0", inplace=True)
    
    # y값 결측치 제거
    df = total_df[~total_df[dataset_config["target_column"]].isna()].copy()
    logger.info(f"y값 제거 후: {len(df)}행")

    # ✅ Inf 값 확인 및 처리 추가
    logger.info("\nInf 값 확인:")
    continuous_cols = dataset_config["continuous_columns"]
    inf_found = False

    for col in continuous_cols:
        if col in df.columns:
            inf_count = np.isinf(df[col]).sum()
            if inf_count > 0:
                print(f"  ❌ {col}: {inf_count}개 Inf 값")
                inf_found = True
                
                # Inf 값을 NaN으로 변환 후 적절한 값으로 대체
                df.loc[np.isinf(df[col]), col] = np.nan
                df[col] = df[col].fillna(1e+5)  # 중간값으로 대체
                logger.info(f"     → 아주 큰 값({1e+5})으로 대체")
    
    if not inf_found:
        print("  ✅ 연속형 변수에 Inf 없음")


    # 불필요한 컬럼 제거
    drop_columns = dataset_config.get("additional_drop_columns", [])
    if drop_columns:
        existing_drops = [col for col in drop_columns if col in df.columns]
        if existing_drops:
            df = df.drop(columns=existing_drops)

    
    df.reset_index(drop=True, inplace=True)
    
    logger.info(f"원본 데이터 로드 완료:")
    logger.info(f"  - 총 행 수: {len(df):,}개")
    logger.info(f"  - 고유 timekey_hr: {df['timekey_hr'].nunique()}개")
    
    # 통합 범주형 처리기 생성 및 학습
    categorical_processor = CategoricalProcessor(
        categorical_columns=dataset_config["categorical_columns"],
        categories=dataset_config.get("categories", []),  # 새로운 파라미터
        embedding_dim=dataset_config.get("embedding_dim", 8)
    )
    categorical_processor.fit(df)
    
    # 날짜 기준으로 데이터 분할
    train_df, val_df, test_df = split_data_by_days(
        df,
        train_ratio=dataset_config.get("train_ratio", 0.8),
        val_ratio=dataset_config.get("val_ratio", 0.1),
        test_ratio=dataset_config.get("test_ratio", 0.1)
    )
    
    # Window sliding 파라미터
    window_size = dataset_config.get("window_size", 30)
    stride = dataset_config.get("stride", None)
    padding_value = dataset_config.get("padding_value", -9999.0)
    
    logger.info(f"\nWindow sliding 설정:")
    logger.info(f"  - Window 크기: {window_size}")
    logger.info(f"  - Stride: {stride if stride is not None else window_size} (비겹침)")
    logger.info(f"  - Padding 값: {padding_value}")
    
    # 데이터셋 생성
    train_dataset = SequenceOperDataset(
        df=train_df,
        categorical_columns=dataset_config["categorical_columns"],
        continuous_columns=dataset_config["continuous_columns"],
        categories=dataset_config.get("categories", []),
        target_column=dataset_config["target_column"],
        categorical_processor=categorical_processor,
        window_size=window_size,
        stride=stride,
        embedding_dim=dataset_config.get("embedding_dim", 8),
        padding_value=padding_value
    )
    
    val_dataset = SequenceOperDataset(
        df=val_df,
        categorical_columns=dataset_config["categorical_columns"],
        continuous_columns=dataset_config["continuous_columns"],
        categories=dataset_config.get("categories", []),
        target_column=dataset_config["target_column"],
        categorical_processor=categorical_processor,
        window_size=window_size,
        stride=stride,
        embedding_dim=dataset_config.get("embedding_dim", 8),
        padding_value=padding_value
    )
    
    test_dataset = SequenceOperDataset(
        df=test_df,
        categorical_columns=dataset_config["categorical_columns"],
        continuous_columns=dataset_config["continuous_columns"],
        categories=dataset_config.get("categories", []),
        target_column=dataset_config["target_column"],
        categorical_processor=categorical_processor,
        window_size=window_size,
        stride=stride,
        embedding_dim=dataset_config.get("embedding_dim", 8),
        padding_value=padding_value
    )
    
    # 임베딩 레이어 추출 (모델에서 사용할 용도)
    embedding_layer = train_dataset.get_embedding_layer()
    
    # 데이터로더 생성
    batch_size = dataset_config.get("batch_size", 32)
    num_workers = dataset_config.get("num_workers", 4)
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=sequence_collate_fn,
        num_workers=num_workers,
        pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=sequence_collate_fn,
        num_workers=num_workers,
        pin_memory=True
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=sequence_collate_fn,
        num_workers=num_workers,
        pin_memory=True
    )
    
    logger.info(f"\n데이터로더 생성 완료:")
    logger.info(f"  - 배치 크기: {batch_size}")
    logger.info(f"  - Train 배치 수: {len(train_loader)}")
    logger.info(f"  - Val 배치 수: {len(val_loader)}")
    logger.info(f"  - Test 배치 수: {len(test_loader)}")
    
    # 특성 차원 정보 출력
    feature_dim = train_dataset.get_feature_dim()
    logger.info(f"  - 최종 특성 차원: {feature_dim}")
    
    return train_loader, val_loader, test_loader, categorical_processor, embedding_layer


## 🏗️ 모델 아키텍처들

### RNN 기본 모델 (models/rnn.py)

In [5]:
class RNNModel(nn.Module):
    """기본 RNN/LSTM/GRU 모델 - 통합 임베딩 데이터셋용"""
    
    def __init__(
        self,
        feature_dim: int,  # 이미 결합된 특성 차원 (연속형 + 범주형 임베딩)
        rnn_type: str = "LSTM",
        hidden_dim: int = 128,
        num_layers: int = 2,
        dropout: float = 0.1,
        bidirectional: bool = True,
        padding_value: float = -9999.0
    ):
        super().__init__()
        
        self.feature_dim = feature_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.padding_value = padding_value
        
        # RNN 레이어 - 입력 차원이 이미 결합된 feature_dim
        if rnn_type.upper() == "LSTM":
            self.rnn = nn.LSTM(
                feature_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        elif rnn_type.upper() == "GRU":
            self.rnn = nn.GRU(
                feature_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        else:  # RNN
            self.rnn = nn.RNN(
                feature_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        
        # 출력 차원 계산
        rnn_output_dim = hidden_dim * (2 if bidirectional else 1)
        
        # 출력 레이어
        self.output_layer = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(rnn_output_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1)
        )
    
    def forward(self, features, masks, **kwargs):
        """
        Args:
            features: [batch_size, seq_len, feature_dim] - 이미 결합된 특성 (연속형 + 범주형 임베딩)
            masks: [batch_size, seq_len] (True = 패딩)
            **kwargs: 호환성을 위한 추가 인자들 (무시됨)
        
        Returns:
            predictions: [batch_size, seq_len]
        """
        batch_size, seq_len = features.shape[:2]
        
        # 패딩된 위치를 마스킹
        masked_features = features.masked_fill(
            masks.unsqueeze(-1), self.padding_value
        )
        
        # RNN forward
        rnn_output, _ = self.rnn(masked_features)
        
        # 출력 레이어
        predictions = self.output_layer(rnn_output).squeeze(-1)
        
        # 패딩된 위치는 0으로 마스킹
        predictions = predictions.masked_fill(masks, 0.0)
        
        return predictions

### RNN + Self-Attention 모델 (models/attention.py)

In [6]:
class SelfAttention(nn.Module):
    """Self-Attention 메커니즘"""
    
    def __init__(self, hidden_dim: int, num_heads: int = 8, dropout: float = 0.1):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.head_dim = hidden_dim // num_heads
        
        assert hidden_dim % num_heads == 0, "hidden_dim must be divisible by num_heads"
        
        self.query = nn.Linear(hidden_dim, hidden_dim)
        self.key = nn.Linear(hidden_dim, hidden_dim)
        self.value = nn.Linear(hidden_dim, hidden_dim)
        
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(hidden_dim)
        
    def forward(self, x, mask=None):
        """
        Args:
            x: [batch_size, seq_len, hidden_dim]
            mask: [batch_size, seq_len] (True = 패딩)
        """
        batch_size, seq_len, _ = x.shape
        
        # Multi-head attention
        Q = self.query(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = self.key(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = self.value(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        
        # Attention scores
        attention_scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.head_dim)
        
        # Apply mask
        if mask is not None:
            attention_mask = mask.unsqueeze(1).unsqueeze(1)  # [batch, 1, 1, seq_len]
            attention_scores = attention_scores.masked_fill(attention_mask, float('-inf'))
        
        # Softmax
        attention_weights = F.softmax(attention_scores, dim=-1)
        attention_weights = self.dropout(attention_weights)
        
        # Apply attention
        attended = torch.matmul(attention_weights, V)
        attended = attended.transpose(1, 2).contiguous().view(batch_size, seq_len, self.hidden_dim)
        
        # Residual connection + Layer norm
        output = self.layer_norm(x + attended)
        
        return output


class RNNAttentionModel(nn.Module):
    """RNN + Self-Attention 모델 - 통합 임베딩 데이터셋용"""
    
    def __init__(
        self,
        feature_dim: int,  # 이미 결합된 특성 차원 (연속형 + 범주형 임베딩)
        rnn_type: str = "LSTM", 
        hidden_dim: int = 128,
        num_layers: int = 2,
        num_attention_heads: int = 8,
        dropout: float = 0.1,
        bidirectional: bool = True,
        padding_value: float = -9999.0
    ):
        super().__init__()
        
        self.feature_dim = feature_dim
        self.hidden_dim = hidden_dim
        self.padding_value = padding_value
        
        # RNN 레이어 - 입력 차원이 이미 결합된 feature_dim
        if rnn_type.upper() == "LSTM":
            self.rnn = nn.LSTM(
                feature_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        elif rnn_type.upper() == "GRU":
            self.rnn = nn.GRU(
                feature_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        else:  # RNN
            self.rnn = nn.RNN(
                feature_dim, hidden_dim, num_layers,
                batch_first=True, dropout=dropout if num_layers > 1 else 0,
                bidirectional=bidirectional
            )
        
        # RNN 출력 차원
        rnn_output_dim = hidden_dim * (2 if bidirectional else 1)
        
        # RNN 출력을 어텐션 입력 차원으로 변환
        self.rnn_projection = nn.Linear(rnn_output_dim, hidden_dim)
        
        # Self-Attention
        self.self_attention = SelfAttention(
            hidden_dim, num_attention_heads, dropout
        )
        
        # 출력 레이어
        self.output_layer = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1)
        )
    
    def forward(self, features, masks, **kwargs):
        """
        Args:
            features: [batch_size, seq_len, feature_dim] - 이미 결합된 특성 (연속형 + 범주형 임베딩)
            masks: [batch_size, seq_len] (True = 패딩)
            **kwargs: 호환성을 위한 추가 인자들 (무시됨)
        
        Returns:
            predictions: [batch_size, seq_len]
        """
        batch_size, seq_len = features.shape[:2]
        
        # 패딩된 위치를 마스킹
        masked_features = features.masked_fill(
            masks.unsqueeze(-1), self.padding_value
        )
        
        # RNN forward
        rnn_output, _ = self.rnn(masked_features)
        
        # RNN 출력 차원 변환
        projected_output = self.rnn_projection(rnn_output)
        
        # Self-Attention 적용
        attended_output = self.self_attention(projected_output, masks)
        
        # 출력 레이어
        predictions = self.output_layer(attended_output).squeeze(-1)
        
        # 패딩된 위치는 0으로 마스킹
        predictions = predictions.masked_fill(masks, 0.0)
        
        return predictions

### CNN 1D 모델 (models/cnn.py)

In [7]:
class CNN1DModel(nn.Module):
    """1D CNN 모델 (다중 커널) - 통합 임베딩 데이터셋용"""
    
    def __init__(
        self,
        feature_dim: int,  # 이미 결합된 특성 차원 (연속형 + 범주형 임베딩)
        kernel_sizes: List[int] = [3, 5, 7],
        num_filters: int = 64,
        dropout: float = 0.1,
        padding_value: float = -9999.0
    ):
        super().__init__()
        
        self.feature_dim = feature_dim
        self.kernel_sizes = kernel_sizes
        self.num_filters = num_filters
        self.padding_value = padding_value
        
        # 다중 커널 1D Conv 레이어들 - 입력 차원이 이미 결합된 feature_dim
        self.conv_layers = nn.ModuleList([
            nn.Conv1d(feature_dim, num_filters, kernel_size, padding=kernel_size//2)
            for kernel_size in kernel_sizes
        ])
        
        # Batch normalization
        self.batch_norms = nn.ModuleList([
            nn.BatchNorm1d(num_filters) for _ in kernel_sizes
        ])
        
        # 출력 레이어
        total_filters = len(kernel_sizes) * num_filters
        self.output_layer = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(total_filters, total_filters // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(total_filters // 2, 1)
        )
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, features, masks, **kwargs):
        """
        Args:
            features: [batch_size, seq_len, feature_dim] - 이미 결합된 특성 (연속형 + 범주형 임베딩)
            masks: [batch_size, seq_len] (True = 패딩)
            **kwargs: 호환성을 위한 추가 인자들 (무시됨)
        
        Returns:
            predictions: [batch_size, seq_len]
        """
        batch_size, seq_len = features.shape[:2]
        
        # 패딩된 위치를 마스킹
        masked_features = features.masked_fill(
            masks.unsqueeze(-1), self.padding_value
        )
        
        # Conv1d를 위해 차원 변환: [batch, seq_len, features] -> [batch, features, seq_len]
        conv_input = masked_features.transpose(1, 2)
        
        # 다중 커널 Conv1D 적용
        conv_outputs = []
        for conv, bn in zip(self.conv_layers, self.batch_norms):
            conv_out = F.relu(bn(conv(conv_input)))  # [batch, filters, seq_len]
            conv_outputs.append(conv_out)
        
        # 모든 커널 출력 결합
        combined_conv = torch.cat(conv_outputs, dim=1)  # [batch, total_filters, seq_len]
        
        # 다시 원래 차원으로: [batch, total_filters, seq_len] -> [batch, seq_len, total_filters]
        combined_conv = combined_conv.transpose(1, 2)
        
        # 출력 레이어
        predictions = self.output_layer(combined_conv).squeeze(-1)
        
        # 패딩된 위치는 0으로 마스킹
        predictions = predictions.masked_fill(masks, 0.0)
        
        return predictions

### Transformer(Encoder) 모델(models/transformer.py)

In [8]:
class PositionalEncoding(nn.Module):
    """사인/코사인 위치 인코딩"""
    
    def __init__(self, d_model: int, max_len: int = 5000, dropout: float = 0.1):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        
        # 위치 인코딩 생성
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * 
                           (-np.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)  # 짝수 인덱스
        pe[:, 1::2] = torch.cos(position * div_term)  # 홀수 인덱스
        
        pe = pe.unsqueeze(0).transpose(0, 1)  # [max_len, 1, d_model]
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        """
        Args:
            x: [batch_size, seq_len, d_model]
        """
        seq_len = x.size(1)
        x = x + self.pe[:seq_len, :, :].transpose(0, 1)  # [batch_size, seq_len, d_model]
        return self.dropout(x)


class TransformerModel(nn.Module):
    """Point-wise Transformer 모델 (인코더만 사용) - 통합 임베딩 데이터셋용"""
    
    def __init__(
        self,
        feature_dim: int,  # 입력 특성 차원 (연속형 + 범주형 임베딩 flatten 결과)
        d_model: int = 256,
        num_heads: int = 8,
        num_layers: int = 6,
        dim_feedforward: int = 1024,
        dropout: float = 0.1,
        activation: str = "relu",
        use_positional_encoding: bool = True,
        padding_value: float = -9999.0
    ):
        super().__init__()
        
        self.d_model = d_model
        self.padding_value = padding_value
        self.use_positional_encoding = use_positional_encoding
        
        # 입력 특성을 d_model 차원으로 projection
        self.input_projection = nn.Linear(feature_dim, d_model)
        
        # 위치 인코딩 (선택적)
        if use_positional_encoding:
            self.positional_encoding = PositionalEncoding(d_model, dropout=dropout)
        
        # Transformer 인코더 레이어
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=num_heads,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
            batch_first=True,  # [batch_size, seq_len, d_model]
            norm_first=False   # Post-norm (표준)
        )
        
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers
        )
        
        # Point-wise 출력 레이어 (각 위치별로 독립적 예측)
        self.output_projection = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 2, 1)
        )
        
        # 파라미터 초기화
        self._init_parameters()
    
    def _init_parameters(self):
        """Xavier uniform 초기화"""
        for name, p in self.named_parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    
    def forward(self, features, masks, **kwargs):
        """
        Args:
            features: [batch_size, seq_len, feature_dim] - 이미 결합된 특성 (연속형 + 범주형 임베딩)
            masks: [batch_size, seq_len] (True = 패딩 위치)
            **kwargs: 호환성을 위한 추가 인자들 (무시됨)
        
        Returns:
            predictions: [batch_size, seq_len] - 각 위치별 예측값
        """
        batch_size, seq_len, _ = features.shape
        
        # 입력 특성을 Transformer 차원으로 projection
        x = self.input_projection(features)  # [batch_size, seq_len, d_model]
        
        # 위치 인코딩 추가 (선택적)
        if self.use_positional_encoding:
            x = self.positional_encoding(x)
        
        # Transformer 인코더 적용
        # src_key_padding_mask: True인 위치는 attention에서 무시
        transformer_output = self.transformer_encoder(
            x, 
            src_key_padding_mask=masks
        )  # [batch_size, seq_len, d_model]
        
        # Point-wise 예측 (각 위치별로 독립적)
        predictions = self.output_projection(transformer_output).squeeze(-1)  # [batch_size, seq_len]
        
        # 패딩된 위치는 0으로 마스킹
        predictions = predictions.masked_fill(masks, 0.0)
        
        return predictions

### 모델 팩토리

In [9]:
def create_model(model_config: Dict, feature_dim: int):
    """설정에 따른 모델 생성 - 통합 임베딩 데이터셋용 (모든 모델 타입 지원)"""
    
    model_type = model_config.get("model_type", "lstm").lower()
    dropout = model_config.get("dropout", 0.1)
    padding_value = model_config.get("padding_value", -9999.0)
    hidden_dim = model_config.get("hidden_dim", 128)
    num_layers = model_config.get("num_layers", 2)
    bidirectional = model_config.get("bidirectional", True)
    
    logger.info(f"모델 생성 중:")
    logger.info(f"  - 모델 타입: {model_type}")
    logger.info(f"  - 입력 특성 차원: {feature_dim}")
    logger.info(f"  - 히든 차원: {hidden_dim}")
    logger.info(f"  - 레이어 수: {num_layers}")
    logger.info(f"  - 드롭아웃: {dropout}")
    
    if model_type == "transformer":
        # Transformer 모델
        d_model = model_config.get("d_model", 256)
        num_heads = model_config.get("num_heads", 8)
        dim_feedforward = model_config.get("dim_feedforward", 1024)
        activation = model_config.get("activation", "relu")
        use_positional_encoding = model_config.get("use_positional_encoding", True)
        
        model = TransformerModel(
            feature_dim=feature_dim,
            d_model=d_model,
            num_heads=num_heads,
            num_layers=num_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
            use_positional_encoding=use_positional_encoding,
            padding_value=padding_value
        )
        
        logger.info(f"  - d_model: {d_model}")
        logger.info(f"  - num_heads: {num_heads}")
        logger.info(f"  - dim_feedforward: {dim_feedforward}")
        logger.info(f"  - 위치 인코딩: {'사용' if use_positional_encoding else '미사용'}")
        
    elif model_type in ["rnn", "lstm", "gru"]:
        # 기본 RNN/LSTM/GRU 모델
        model = RNNModel(
            feature_dim=feature_dim,
            rnn_type=model_type.upper(),
            hidden_dim=hidden_dim,
            num_layers=num_layers,
            dropout=dropout,
            bidirectional=bidirectional,
            padding_value=padding_value
        )
        
        logger.info(f"  - RNN 타입: {model_type.upper()}")
        logger.info(f"  - 양방향: {'Yes' if bidirectional else 'No'}")
        
    elif model_type in ["rnn_attention", "lstm_attention", "gru_attention"]:
        # RNN + Self-Attention 모델
        rnn_type = model_type.replace("_attention", "").upper()
        num_attention_heads = model_config.get("num_attention_heads", 8)
        
        model = RNNAttentionModel(
            feature_dim=feature_dim,
            rnn_type=rnn_type,
            hidden_dim=hidden_dim,
            num_layers=num_layers,
            num_attention_heads=num_attention_heads,
            dropout=dropout,
            bidirectional=bidirectional,
            padding_value=padding_value
        )
        
        logger.info(f"  - RNN 타입: {rnn_type}")
        logger.info(f"  - 양방향: {'Yes' if bidirectional else 'No'}")
        logger.info(f"  - Attention heads: {num_attention_heads}")
        
    elif model_type == "cnn1d":
        # CNN 1D 모델
        kernel_sizes = model_config.get("kernel_sizes", [3, 5, 7])
        num_filters = model_config.get("num_filters", 64)
        
        model = CNN1DModel(
            feature_dim=feature_dim,
            kernel_sizes=kernel_sizes,
            num_filters=num_filters,
            dropout=dropout,
            padding_value=padding_value
        )
        
        logger.info(f"  - 커널 크기들: {kernel_sizes}")
        logger.info(f"  - 필터 수: {num_filters}")
        
    else:
        raise ValueError(f"Unknown model_type: {model_type}")
    
    # 파라미터 수 계산 및 출력
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    logger.info(f"  - 총 파라미터 수: {total_params:,}")
    logger.info(f"  - 학습 가능한 파라미터 수: {trainable_params:,}")
    logger.info(f"모델 생성 완료!")
    
    return model


def create_model_from_config_and_dataloader(model_config: Dict, train_loader: DataLoader) -> nn.Module:
    """데이터로더에서 특성 차원을 추출하여 모델 생성하는 헬퍼 함수"""
    
    # 첫 번째 배치에서 특성 차원 추출
    sample_batch = next(iter(train_loader))
    feature_dim = sample_batch['features'].shape[-1]  # [batch_size, seq_len, feature_dim]
    
    logger.info(f"데이터로더에서 추출한 특성 차원: {feature_dim}")
    
    return create_model(model_config, feature_dim)

## 🚂 훈련 및 평가 함수들

### 마스크 기반 손실 함수

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import logging
from tqdm import tqdm
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau


class MaskedMSELoss(nn.Module):
    """패딩을 고려한 안전한 MSE Loss - NaN 방지"""
    
    def __init__(self, padding_value: float = -9999.0):
        super().__init__()
        self.padding_value = padding_value
    
    def forward(self, predictions, targets, masks):
        """
        Args:
            predictions: [batch_size, seq_len]
            targets: [batch_size, seq_len]  
            masks: [batch_size, seq_len] (True = 패딩)
        """
        # 패딩되지 않은 위치만 선택
        valid_mask = ~masks
        
        if valid_mask.sum() == 0:
            print("⚠️ 경고: 유효한 데이터가 없습니다!")
            return torch.tensor(0.0, device=predictions.device, requires_grad=True)
        
        valid_predictions = predictions[valid_mask]
        valid_targets = targets[valid_mask]
        
        # NaN/Inf 체크 및 제거
        finite_mask = torch.isfinite(valid_predictions) & torch.isfinite(valid_targets)
        
        if finite_mask.sum() == 0:
            print("⚠️ 경고: finite한 값이 없습니다!")
            return torch.tensor(1e6, device=predictions.device, requires_grad=True)  # 큰 손실값 반환
        
        valid_predictions = valid_predictions[finite_mask]
        valid_targets = valid_targets[finite_mask]
        
        # MSE 계산
        mse = F.mse_loss(valid_predictions, valid_targets)
        
        # NaN 체크
        if torch.isnan(mse) or torch.isinf(mse):
            print(f"⚠️ 경고: MSE가 {mse.item()}입니다!")
            return torch.tensor(1e6, device=predictions.device, requires_grad=True)
        
        return mse


def compute_metrics(predictions, targets, masks, padding_value: float = -9999.0):
    """패딩을 고려한 메트릭 계산"""
    valid_mask = ~masks
    
    if valid_mask.sum() == 0:
        return {"mse": 0.0, "rmse": 0.0, "mae": 0.0, "mape": 0.0, "valid_count": 0}
    
    valid_predictions = predictions[valid_mask]
    valid_targets = targets[valid_mask]
    
    # CPU로 변환
    valid_predictions = valid_predictions.detach().cpu().numpy()
    valid_targets = valid_targets.detach().cpu().numpy()
    
    mse = np.mean((valid_predictions - valid_targets) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(valid_predictions - valid_targets))
    
    # MAPE 계산 (0으로 나누기 방지)
    epsilon = 1e-8
    abs_targets = np.abs(valid_targets)
    abs_errors = np.abs(valid_predictions - valid_targets)
    safe_targets = np.maximum(abs_targets, epsilon)
    mape = np.mean(abs_errors / safe_targets * 100)
    
    return {
        "mse": mse,
        "rmse": rmse, 
        "mae": mae,
        "mape": mape,
        "valid_count": len(valid_predictions)
    }




### 훈련 에폭

In [11]:
def train_epoch(model, dataloader, criterion, optimizer, device, epoch):
    """한 에폭 훈련 - 통합 임베딩 데이터셋용"""
    model.train()
    total_loss = 0.0
    total_metrics = {"mse": 0.0, "rmse": 0.0, "mae": 0.0, "mape": 0.0, "valid_count": 0}
    
    pbar = tqdm(
        enumerate(dataloader), 
        total=len(dataloader),
        desc=f"Epoch {epoch} [Train]",
        leave=False
    )
    
    for batch_idx, batch in pbar:
        # 새로운 데이터셋 구조에 맞는 키 사용
        features = batch["features"].to(device)  # 이미 결합된 특성
        targets = batch["targets"].to(device)
        masks = batch["masks"].to(device)
        
        optimizer.zero_grad()
        
        # 모든 모델이 동일한 인터페이스 사용
        predictions = model(features, masks)
        loss = criterion(predictions, targets, masks)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # 메트릭 계산
        with torch.no_grad():
            batch_metrics = compute_metrics(predictions, targets, masks)
        
        total_loss += loss.item()
        for key in ["mse", "rmse", "mae", "mape"]:
            total_metrics[key] += batch_metrics[key]
        total_metrics["valid_count"] += batch_metrics["valid_count"]
        
        # 진행바 업데이트
        pbar.set_postfix({
            "Loss": f"{loss.item():.4f}",
            "MAPE": f"{batch_metrics['mape']:.2f}%"
        })
    
    pbar.close()
    
    # 평균 계산
    avg_loss = total_loss / len(dataloader)
    for key in ["mse", "rmse", "mae", "mape"]:
        total_metrics[key] = total_metrics[key] / len(dataloader)
    
    return avg_loss, total_metrics


def validate_epoch(model, dataloader, criterion, device, epoch=None):
    """검증 에폭 - 통합 임베딩 데이터셋용"""
    model.eval()
    total_loss = 0
    total_metrics = {"mse": 0, "rmse": 0, "mae": 0, "mape": 0, "valid_count": 0}
    
    desc = f"Epoch {epoch} [Val]" if epoch is not None else "Validation"
    pbar = tqdm(dataloader, desc=desc, leave=False)
    
    with torch.no_grad():
        for batch in pbar:
            # 새로운 데이터셋 구조에 맞는 키 사용
            features = batch["features"].to(device)
            targets = batch["targets"].to(device)
            masks = batch["masks"].to(device)
            
            # 모든 모델이 동일한 인터페이스 사용
            predictions = model(features, masks)
            loss = criterion(predictions, targets, masks)
            
            batch_metrics = compute_metrics(predictions, targets, masks)
            
            total_loss += loss.item()
            for key in ["mse", "rmse", "mae", "mape"]:
                total_metrics[key] += batch_metrics[key]
            total_metrics["valid_count"] += batch_metrics["valid_count"]
            
            pbar.set_postfix({
                "Loss": f"{loss.item():.4f}",
                "MAPE": f"{batch_metrics['mape']:.2f}%"
            })
    
    pbar.close()
    
    avg_loss = total_loss / len(dataloader)
    for key in ["mse", "rmse", "mae", "mape"]:
        total_metrics[key] = total_metrics[key] / len(dataloader)
    
    return avg_loss, total_metrics

### 메인 훈련 루프

In [12]:
def train_model(model, train_loader, val_loader, training_config, device, save_path):
    """메인 훈련 루프 - 통합 임베딩 데이터셋용 (검증 간격 설정 가능)"""
    
    num_epochs = training_config.get("num_epochs", 100)
    learning_rate = training_config.get("learning_rate", 1e-3)
    patience = training_config.get("patience", 20)
    padding_value = training_config.get("padding_value", -9999.0)
    
    # 검증 간격 설정 (새로 추가)
    val_interval = training_config.get("val_interval", 1)  # 기본값: 매 에폭마다 검증
    log_interval = training_config.get("log_interval", 1)  # 기본값: 매 에폭마다 로깅
    
    # 손실 함수 및 옵티마이저
    criterion = MaskedMSELoss(padding_value)
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=patience//2, verbose=True)
    
    model = model.to(device)
    
    best_val_loss = float('inf')
    patience_counter = 0
    last_val_loss = None
    last_val_metrics = None
    
    logger.info(f"훈련 시작: {num_epochs} 에폭, 학습률 {learning_rate}")
    logger.info(f"모델 타입: {training_config.get('model_type', 'unknown')}")
    logger.info(f"검증 간격: {val_interval} 에폭마다")
    logger.info(f"로깅 간격: {log_interval} 에폭마다")
    
    # 에폭 진행바
    epoch_pbar = tqdm(range(1, num_epochs + 1), desc="Training Progress")
    
    for epoch in epoch_pbar:
        # 훈련은 매 에폭마다 실시
        train_loss, train_metrics = train_epoch(
            model, train_loader, criterion, optimizer, device, epoch
        )
        
        # 검증은 지정된 간격마다 또는 마지막 에폭에서 실시
        should_validate = (epoch % val_interval == 0) or (epoch == num_epochs)
        
        if should_validate:
            val_loss, val_metrics = validate_epoch(
                model, val_loader, criterion, device, epoch
            )
            last_val_loss = val_loss
            last_val_metrics = val_metrics
            
            # 스케줄러 업데이트 (검증이 실시된 경우만)
            scheduler.step(val_loss)
        else:
            # 검증하지 않는 에폭에서는 이전 검증 결과 사용
            val_loss = last_val_loss if last_val_loss is not None else float('inf')
            val_metrics = last_val_metrics if last_val_metrics is not None else {
                "mse": 0.0, "rmse": 0.0, "mae": 0.0, "mape": 0.0, "valid_count": 0
            }
        
        # 로깅 (지정된 간격마다 또는 검증이 실시된 경우)
        should_log = (epoch % log_interval == 0) or should_validate or (epoch == num_epochs)
        
        if should_log:
            if should_validate:
                logger.info(
                    f"Epoch {epoch:3d}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, "
                    f'Train MAPE={train_metrics["mape"]:.2f}%, Val MAPE={val_metrics["mape"]:.2f}%'
                )
            else:
                logger.info(
                    f"Epoch {epoch:3d}: Train Loss={train_loss:.4f}, "
                    f'Train MAPE={train_metrics["mape"]:.2f}% (검증 생략)'
                )
        
        # 진행바 업데이트
        val_status = "검증됨" if should_validate else "이전값"
        epoch_pbar.set_postfix({
            "T_Loss": f"{train_loss:.4f}",
            "V_Loss": f"{val_loss:.4f}({val_status})",
            "V_MAPE": f'{val_metrics["mape"]:.2f}%',
            "Best": f"{best_val_loss:.4f}",
            "Patience": f"{patience_counter}/{patience}"
        })
        
        # 최고 모델 저장 (검증이 실시된 경우만)
        if should_validate and val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_loss': val_loss,
                'val_metrics': val_metrics,
                'train_metrics': train_metrics,
                'model_type': training_config.get('model_type', 'unknown'),
                'feature_dim': training_config.get('feature_dim', None),
                'config': training_config
            }, save_path)
            
            logger.info(f"  → Best model saved! (Val Loss: {val_loss:.4f})")
        elif should_validate:
            # 검증은 했지만 성능이 개선되지 않은 경우
            patience_counter += 1
        # 검증하지 않은 경우 patience_counter는 증가시키지 않음
        
        # 조기 종료 (검증이 실시된 경우만 확인)
        if should_validate and patience_counter >= patience:
            logger.info(f"Early stopping at epoch {epoch}")
            break
    
    epoch_pbar.close()
    
    # 마지막에 한번 더 검증 (마지막 에폭에서 검증하지 않았다면)
    if not should_validate:
        logger.info("최종 검증 실시 중...")
        final_val_loss, final_val_metrics = validate_epoch(
            model, val_loader, criterion, device
        )
        logger.info(f"최종 검증 결과: Val Loss={final_val_loss:.4f}, Val MAPE={final_val_metrics['mape']:.2f}%")
    
    return {
        'best_val_loss': best_val_loss,
        'total_epochs': epoch,
        'early_stopped': patience_counter >= patience,
        'validation_count': len([e for e in range(1, epoch + 1) if e % val_interval == 0 or e == epoch])
    }

def evaluate_model(model, test_loader, device, model_path, config):
    """모델 평가 - 통합 임베딩 데이터셋용 (구조 정보 포함)"""
    logger.info(f"모델 로드: {model_path}")
    checkpoint = torch.load(model_path, map_location=device, weights_only=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    
    model = model.to(device)
    model.eval()
    
    padding_value = config.get("padding_value", -9999.0)
    criterion = MaskedMSELoss(padding_value)
    
    # 구조화된 결과를 위한 리스트들
    structured_predictions = []
    all_predictions = []
    all_targets = []
    total_loss = 0.0
    
    logger.info("테스트 시작")
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            # collate_fn에서 반환하는 키 이름들 사용
            features = batch["features"].to(device)  # 이미 결합된 특성
            targets = batch["targets"].to(device)
            masks = batch["masks"].to(device)
            
            # 구조 정보 추출 (collate_fn의 키 이름들과 일치)
            timekey_hrs = batch["timekey_hrs"]
            oper_ids_lists = batch["oper_ids_lists"]
            actual_lengths = batch["actual_lengths"]
            first_oper_ids = batch["first_oper_ids"]
            last_oper_ids = batch["last_oper_ids"]
            
            # 모델 예측
            predictions = model(features, masks)
            loss = criterion(predictions, targets, masks)
            total_loss += loss.item()
            
            # CPU로 변환
            predictions_cpu = predictions.cpu()
            targets_cpu = targets.cpu()
            masks_cpu = masks.cpu()
            
            # 배치 내 각 샘플에 대해 구조화된 결과 생성
            batch_size = predictions_cpu.shape[0]
            for sample_idx in range(batch_size):
                timekey_hr = timekey_hrs[sample_idx]
                oper_ids = oper_ids_lists[sample_idx]
                actual_length = actual_lengths[sample_idx]
                first_oper_id = first_oper_ids[sample_idx]
                last_oper_id = last_oper_ids[sample_idx]
                
                sample_predictions = predictions_cpu[sample_idx]
                sample_targets = targets_cpu[sample_idx]
                sample_masks = masks_cpu[sample_idx]
                
                # 각 시퀀스 위치에 대해 (패딩되지 않은 위치만)
                for seq_idx in range(actual_length):
                    if seq_idx < len(sample_predictions) and not sample_masks[seq_idx]:
                        pred_val = sample_predictions[seq_idx].item()
                        target_val = sample_targets[seq_idx].item()
                        oper_id = oper_ids[seq_idx] if seq_idx < len(oper_ids) else None
                        
                        # 개별 예측 결과 저장
                        structured_predictions.append({
                            'timekey_hr': timekey_hr,
                            'oper_id': oper_id,
                            'seq_position': seq_idx,
                            'prediction': pred_val,
                            'actual': target_val,
                            'first_oper_id': first_oper_id,
                            'last_oper_id': last_oper_id,
                            'window_length': actual_length
                        })
                        
                        all_predictions.append(pred_val)
                        all_targets.append(target_val)
    
    avg_loss = total_loss / len(test_loader)
    
    # 메트릭 계산
    all_predictions = np.array(all_predictions)
    all_targets = np.array(all_targets)
    
    if len(all_predictions) == 0:
        logger.warning("예측 결과가 없습니다!")
        metrics = {"mse": 0.0, "rmse": 0.0, "mae": 0.0, "mape": 0.0, "valid_count": 0}
    else:
        mse = np.mean((all_predictions - all_targets) ** 2)
        rmse = np.sqrt(mse)
        mae = np.mean(np.abs(all_predictions - all_targets))
        
        # MAPE 계산 (0으로 나누기 방지)
        epsilon = 1e-8
        abs_targets = np.abs(all_targets)
        abs_errors = np.abs(all_predictions - all_targets)
        safe_targets = np.maximum(abs_targets, epsilon)
        mape = np.mean(abs_errors / safe_targets * 100)
        
        metrics = {
            "mse": mse,
            "rmse": rmse,
            "mae": mae,
            "mape": mape,
            "valid_count": len(all_predictions)
        }
    
    logger.info(f"테스트 결과: RMSE={metrics['rmse']:.4f}, MAE={metrics['mae']:.4f}, MAPE={metrics['mape']:.2f}%")
    logger.info(f"구조화된 예측 결과: {len(structured_predictions):,}개")
    
    # 모델 정보 수집
    model_info = {
        "total_parameters": sum(p.numel() for p in model.parameters()),
        "trainable_parameters": sum(p.numel() for p in model.parameters() if p.requires_grad),
        "model_type": config.get("model_type", "unknown")
    }
    
    # checkpoint에서 추가 정보 수집 (있는 경우)
    if 'feature_dim' in checkpoint:
        model_info['feature_dim'] = checkpoint['feature_dim']
    if 'epoch' in checkpoint:
        model_info['best_epoch'] = checkpoint['epoch']
    if 'val_loss' in checkpoint:
        model_info['best_val_loss'] = checkpoint['val_loss']
    
    return {
        "test_loss": avg_loss,
        "metrics": metrics,
        "predictions": all_predictions,
        "targets": all_targets,
        "structured_predictions": structured_predictions,
        "model_info": model_info
    }

## 🎯 메인 실행 함수

In [13]:
def main():
    """메인 실행 함수"""
    import argparse

    parser = argparse.ArgumentParser(description="시계열 시퀀스 모델링")
    parser.add_argument("--config-dir", default="configs", help="설정 파일 디렉토리")
    parser.add_argument("--mode", choices=["train", "eval"], default="train", help="실행 모드")
    parser.add_argument("--model-path", default=None, help="평가용 모델 경로")
    parser.add_argument("--gpu", type=int, default=0, help="GPU 번호")
    parser.add_argument("--exp-name", default=None, help="실험명")

    args = parser.parse_args([])

    # 설정 로드
    config = load_config(args.config_dir)
    set_random_seeds(42)

    # 실험명 설정
    if args.exp_name:
        exp_name = args.exp_name
    else:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        model_type = config.get("model_type", "lstm")
        exp_name = f"{model_type}_{timestamp}"

    # 저장 디렉토리
    save_dir = config.get("save_dir", "models")
    os.makedirs(save_dir, exist_ok=True)
    model_save_path = os.path.join(save_dir, f"{exp_name}.pth")

    # 로깅 설정
    logger = setup_logging(os.path.join(save_dir, config.get("log_file", f"{exp_name}.log")))

    # 디바이스 설정
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    logger.info(f"Using device: {device}")

    # 데이터로더 생성 (통합 임베딩 방식)
    logger.info("데이터 로딩 중...")
    train_loader, val_loader, test_loader, categorical_processor, embedding_layer = create_dataloaders(config)

    # 모델 생성 (새로운 방식)
    logger.info("모델 생성 중...")
    model = create_model_from_config_and_dataloader(config, train_loader)

    if args.mode == "train":
        # 훈련
        logger.info("훈련 시작...")
        train_results = train_model(
            model, train_loader, val_loader, config, device, model_save_path
        )
        
        logger.info("훈련 완료, 테스트 시작...")
        test_results = evaluate_model(
            model, test_loader, device, model_save_path, config
        )
        
    else:
        # 평가
        if not args.model_path:
            raise ValueError("--model-path must be provided in eval mode")
        test_results = evaluate_model(
            model, test_loader, device, args.model_path, config
        )

    # 결과 저장
    results = {
        "exp_name": exp_name,
        "config": config,
        "test_metrics": test_results["metrics"],
        "model_info": test_results.get("model_info", {
            "total_parameters": sum(p.numel() for p in model.parameters()),
            "trainable_parameters": sum(p.numel() for p in model.parameters() if p.requires_grad),
            "model_type": config.get("model_type", "unknown")
        })
    }

    results_path = os.path.join(save_dir, f"{exp_name}_results.json")
    with open(results_path, "w") as f:
        json.dump(results, f, indent=2, default=str)

    logger.info(f"결과 저장 완료: {results_path}")

    # 예측 결과 저장
    if "structured_predictions" in test_results and test_results["structured_predictions"]:
        # 구조화된 예측 결과 저장 (timekey_hr, oper_id 포함)
        structured_df = pd.DataFrame(test_results["structured_predictions"])
        
        # 컬럼명 확인 후 에러 계산 (prediction vs predicted 통일)
        pred_col = "prediction" if "prediction" in structured_df.columns else "predicted"
        
        structured_df["error"] = structured_df[pred_col] - structured_df["actual"]
        structured_df["abs_error"] = structured_df["error"].abs()
        structured_df["abs_percent_error"] = (
            structured_df["abs_error"] / structured_df["actual"].abs().clip(lower=1e-8) * 100
        )
        
        # 구조화된 결과를 메인 예측 파일로 저장
        predictions_path = os.path.join(save_dir, f"{exp_name}_predictions.csv")
        structured_df.to_csv(predictions_path, index=False)
        
        logger.info(f"구조화된 예측 결과 저장:")
        logger.info(f"  - 파일 경로: {predictions_path}")
        logger.info(f"  - 저장된 예측 개수: {len(structured_df):,}개")
        logger.info(f"  - 고유한 timekey_hr: {structured_df['timekey_hr'].nunique()}개")
        
        # oper_id 정보 출력 (있는 경우)
        if 'oper_id' in structured_df.columns:
            logger.info(f"  - 고유한 oper_id: {structured_df['oper_id'].nunique()}개")
        
        # 추가 구조 정보 출력 (있는 경우)
        if 'first_oper_id' in structured_df.columns:
            logger.info(f"  - 고유한 first_oper_id: {structured_df['first_oper_id'].nunique()}개")
        if 'last_oper_id' in structured_df.columns:
            logger.info(f"  - 고유한 last_oper_id: {structured_df['last_oper_id'].nunique()}개")
        if 'window_length' in structured_df.columns:
            avg_window = structured_df['window_length'].mean()
            logger.info(f"  - 평균 윈도우 길이: {avg_window:.1f}")
        
    else:
        # 구조화된 정보가 없는 경우 기본 방식으로 저장 (호환성 유지)
        if "predictions" in test_results and "targets" in test_results:
            predictions_df = pd.DataFrame({
                "actual": test_results["targets"],
                "predicted": test_results["predictions"],
                "residual": test_results["targets"] - test_results["predictions"],
                "abs_error": np.abs(test_results["targets"] - test_results["predictions"]),
                "abs_percent_error": (
                    np.abs(test_results["targets"] - test_results["predictions"]) / 
                    np.maximum(np.abs(test_results["targets"]), 1e-8) * 100
                )
            })
            
            predictions_path = os.path.join(save_dir, f"{exp_name}_predictions.csv")
            predictions_df.to_csv(predictions_path, index=False)
            
            logger.info(f"기본 예측 결과 저장:")
            logger.info(f"  - 파일 경로: {predictions_path}")
            logger.info(f"  - 저장된 예측 개수: {len(predictions_df):,}개")
        else:
            logger.warning("예측 결과 데이터가 없어 CSV 파일을 저장할 수 없습니다.")

    # 최종 결과 요약
    logger.info("=" * 50)
    logger.info(f"실험 완료: {exp_name}")
    logger.info(f"모델 타입: {config.get('model_type')}")
    logger.info(f"테스트 RMSE: {test_results['metrics']['rmse']:.4f}")
    logger.info(f"테스트 MAE: {test_results['metrics']['mae']:.4f}")
    logger.info(f"테스트 MAPE: {test_results['metrics']['mape']:.2f}%")

    # 모델 정보 출력
    model_info = results["model_info"]
    if "total_parameters" in model_info:
        logger.info(f"총 파라미터 수: {model_info['total_parameters']:,}")
    if "trainable_parameters" in model_info:
        logger.info(f"학습 가능한 파라미터 수: {model_info['trainable_parameters']:,}")

    logger.info(f"모델 저장 위치: {model_save_path}")
    logger.info(f"결과 저장 위치: {results_path}")
    logger.info("=" * 50)


if __name__ == "__main__":
    main()

2025-09-04 13:52:30,815 - INFO - Using device: cuda:0
2025-09-04 13:52:30,815 - INFO - 데이터 로딩 중...
2025-09-04 14:07:57,271 - INFO - y값 제거 후: 1671457행
2025-09-04 14:07:57,272 - INFO - 
Inf 값 확인:
2025-09-04 14:07:57,285 - INFO -      → 아주 큰 값(100000.0)으로 대체
2025-09-04 14:07:57,478 - INFO - 원본 데이터 로드 완료:


  ❌ x5: 144개 Inf 값


2025-09-04 14:07:57,479 - INFO -   - 총 행 수: 1,671,457개
2025-09-04 14:07:57,488 - INFO -   - 고유 timekey_hr: 2136개
2025-09-04 14:07:57,488 - INFO - 통합된 범주형 변수 설정:
2025-09-04 14:07:57,488 - INFO -   - 변수별 카테고리 수: {'oper_group': 277, 'days': 7, 'shift': 3, 'x1': 20}
2025-09-04 14:07:57,489 - INFO -   - 총 vocabulary 크기: 307
2025-09-04 14:07:57,600 - INFO -   - oper_group: 인덱스 0~276 (277개)
2025-09-04 14:07:57,693 - INFO -   - days: 인덱스 277~283 (7개)
2025-09-04 14:07:57,783 - INFO -   - shift: 인덱스 284~286 (3개)
2025-09-04 14:07:57,886 - INFO -   - x1: 인덱스 287~306 (20개)
2025-09-04 14:07:57,886 - INFO -   - 최종 통합 vocabulary 크기: 307
2025-09-04 14:07:58,507 - INFO - 날짜 기준 데이터 분할 완료:
2025-09-04 14:07:58,508 - INFO -   - 총 날짜 수: 90일
2025-09-04 14:07:58,508 - INFO -   - Train: 72일 (1,342,808행)
2025-09-04 14:07:58,509 - INFO -   - Validation: 9일 (170,705행)
2025-09-04 14:07:58,509 - INFO -   - Test: 9일 (157,944행)
2025-09-04 14:07:58,510 - INFO -   - Train 날짜 범위: 20250503 ~ 20250713
2025-09-04 14:07:58,5


=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0


Epoch 1 [Train]:  27%|██▋       | 468/1714 [00:05<00:11, 105.64it/s, Loss=0.0377, MAPE=62.83%]




[A

  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307


Epoch 1 [Train]:  27%|██▋       | 468/1714 [00:05<00:11, 105.64it/s, Loss=0.0256, MAPE=62.13%]




[A

  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0





  타겟 범위: [0.0060, 0.0380]


Epoch 1 [Train]:  27%|██▋       | 468/1714 [00:05<00:11, 105.64it/s, Loss=0.0191, MAPE=71.60%][A


=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0



Epoch 1 [Train]:  75%|███████▍  | 1282/1714 [00:13<00:04, 103.81it/s, Loss=0.0315, MAPE=60.68%]

  범위: [0.0000, 4175.0000]

[A


범주형 데이터: torch.Size([50, 4])
  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])  Flatten NaN: 0






최종 결합: torch.Size([50, 52])

Epoch 1 [Train]:  75%|███████▍  | 1282/1714 [00:13<00:04, 103.81it/s, Loss=0.0183, MAPE=59.88%][A


  결합 NaN: 0







Epoch 1 [Train]:  75%|███████▌  | 1293/1714 [00:13<00:04, 100.78it/s, Loss=0.0183, MAPE=59.88%]

  결합 Inf: 0

[A


  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0





  Inf: 0

Epoch 1 [Train]:  85%|████████▌ | 1458/1714 [00:15<00:02, 104.94it/s, Loss=0.0145, MAPE=63.11%]


  범위: [0.0000, 4175.0000]

[A


범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0  임베딩 Inf: 0

  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0


Epoch 1 [Train]:  85%|████████▌ | 1458/1714 [00:15<00:02, 104.94it/s, Loss=0.0156, MAPE=65.41%]




[A

최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0





  타겟 범위: [0.0030, 0.0400]


Epoch 1 [Train]:  85%|████████▌ | 1458/1714 [00:15<00:02, 104.94it/s, Loss=0.0144, MAPE=54.38%][A2025-09-04 14:09:01,401 - INFO - Epoch   1: Train Loss=0.0267, Train MAPE=60036.54% (검증 생략)
Training Progress:  10%|█         | 1/10 [00:18<02:45, 18.38s/it, T_Loss=0.0267, V_Loss=inf(이전값), V_MAPE=0.00%, Best=inf, Patience=0/20]


=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])



Epoch 2 [Train]:  21%|██        | 364/1714 [00:03<00:13, 101.13it/s, Loss=0.0321, MAPE=131.03%]

  NaN: 0

[A


  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0







Epoch 2 [Train]:  21%|██        | 364/1714 [00:03<00:13, 101.13it/s, Loss=0.0110, MAPE=81.75%] 

  Flatten 후: torch.Size([50, 32])

[A


  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4175.0000]


Epoch 2 [Train]:  78%|███████▊  | 1344/1714 [00:13<00:03, 103.81it/s, Loss=0.0235, MAPE=54.70%]


범주형 데이터: torch.Size([50, 4])

[A


  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0최종 결합: torch.Size([50, 52])

  결합 NaN: 0





  결합 Inf: 0

Epoch 2 [Train]:  78%|███████▊  | 1344/1714 [00:13<00:03, 103.81it/s, Loss=0.0362, MAPE=52.00%]




[A

  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])


Epoch 2 [Train]:  97%|█████████▋| 1661/1714 [00:16<00:00, 104.06it/s, Loss=0.0158, MAPE=50.23%]




[A

  NaN: 0
  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])

  임베딩 NaN: 0  임베딩 Inf: 0





  Flatten 후: torch.Size([50, 32])


Epoch 2 [Train]:  97%|█████████▋| 1661/1714 [00:16<00:00, 104.06it/s, Loss=0.0091, MAPE=49.57%]

  Flatten NaN: 0


[A

최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0



Epoch 2 [Train]:  97%|█████████▋| 1661/1714 [00:16<00:00, 104.06it/s, Loss=0.0412, MAPE=55.35%]

  결합 범위: [-3.8325, 4175.0000]

[A


타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 0 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 5749.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [0, 289]
  고유값: [0, 1, 12, 23, 34, 45, 56, 67, 78, 89, 100, 111, 112, 134, 156, 167, 178, 189, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 222, 233, 244, 255, 266, 280, 286, 287, 288, 289]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.7773, 5749.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0150, 1.0080]

=== 샘플 1 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4450.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 291]
  고유값: [1, 12, 23, 34, 45, 56, 67, 78, 89, 145, 200, 208, 212, 213, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 

2025-09-04 14:09:20,625 - INFO - Epoch   2: Train Loss=0.0248, Val Loss=0.0205, Train MAPE=57499.59%, Val MAPE=16735.68%
Training Progress:  10%|█         | 1/10 [00:37<02:45, 18.38s/it, T_Loss=0.0248, V_Loss=0.0205(검증됨), V_MAPE=16735.68%, Best=inf, Patience=0/20]2025-09-04 14:09:20,640 - INFO -   → Best model saved! (Val Loss: 0.0205)
Training Progress:  20%|██        | 2/10 [00:37<02:31, 18.89s/it, T_Loss=0.0248, V_Loss=0.0205(검증됨), V_MAPE=16735.68%, Best=inf, Patience=0/20]


=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50





연속형 데이터: torch.Size([50, 20])

Epoch 3 [Train]:  51%|█████     | 867/1714 [00:08<00:08, 101.54it/s, Loss=0.0290, MAPE=58.23%]




[A

  NaN: 0
  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0





  임베딩 Inf: 0

Epoch 3 [Train]:  51%|█████     | 867/1714 [00:08<00:08, 101.54it/s, Loss=0.0189, MAPE=58.40%]


  Flatten 후: torch.Size([50, 32])

[A


  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0


Epoch 3 [Train]:  56%|█████▋    | 966/1714 [00:09<00:07, 100.86it/s, Loss=0.0213, MAPE=64.06%]




[A

  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307






  임베딩 embed_dim: 8  임베딩 결과: torch.Size([50, 4, 8])


Epoch 3 [Train]:  56%|█████▋    | 966/1714 [00:09<00:07, 100.86it/s, Loss=0.0193, MAPE=71.04%]

  임베딩 NaN: 0
  임베딩 Inf: 0

[A


  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])



Epoch 3 [Train]:  80%|████████  | 1372/1714 [00:13<00:03, 103.02it/s, Loss=0.0104, MAPE=55.11%]

  NaN: 0

[A


  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...


Epoch 3 [Train]:  80%|████████  | 1372/1714 [00:13<00:03, 103.02it/s, Loss=0.0099, MAPE=61.52%]




[A

  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]

타겟 데이터: torch.Size([50])




  타겟 NaN: 0

Epoch 3 [Train]:  80%|████████  | 1372/1714 [00:13<00:03, 103.02it/s, Loss=0.0395, MAPE=52.20%][A


  타겟 범위: [0.0030, 0.0400]


2025-09-04 14:09:37,794 - INFO - Epoch   3: Train Loss=0.0247, Train MAPE=56787.93% (검증 생략)
Training Progress:  30%|███       | 3/10 [00:54<02:06, 18.10s/it, T_Loss=0.0247, V_Loss=0.0205(이전값), V_MAPE=16735.68%, Best=0.0205, Patience=0/20]


=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0





  Inf: 0

Epoch 4 [Train]:   6%|▌         | 95/1714 [00:00<00:16, 100.47it/s, Loss=0.0208, MAPE=59.53%][A


  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])  Flatten NaN: 0








Epoch 4 [Train]:   6%|▌         | 95/1714 [00:01<00:16, 100.47it/s, Loss=0.0197, MAPE=64.22%]

최종 결합: torch.Size([50, 52])
  결합 NaN: 0

[A


  결합 Inf: 0
  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0





  Inf: 0

Epoch 4 [Train]:  72%|███████▏  | 1230/1714 [00:12<00:04, 103.92it/s, Loss=0.0120, MAPE=63.70%]




[A

  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0





최종 결합: torch.Size([50, 52])


Epoch 4 [Train]:  72%|███████▏  | 1230/1714 [00:12<00:04, 103.92it/s, Loss=0.0165, MAPE=52.84%]

  결합 NaN: 0


[A

  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0



Epoch 4 [Train]:  87%|████████▋ | 1494/1714 [00:14<00:02, 102.92it/s, Loss=0.0087, MAPE=55.82%]

  Inf: 0

[A


  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8

  임베딩 NaN: 0
  임베딩 결과: torch.Size([50, 4, 8])




  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])

Epoch 4 [Train]:  87%|████████▋ | 1494/1714 [00:14<00:02, 102.92it/s, Loss=0.1138, MAPE=53.33%][A


  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 0 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 5749.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [0, 289]
  고유값: [0, 1, 12, 23, 34, 45, 56, 67, 78, 89, 100, 111, 112, 134, 156, 167, 178, 189, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 222, 233, 244, 255, 266, 280, 286, 287, 288, 289]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0

  결합 범위: [-2.7773, 5749.0000]타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0150, 1.0080]

=== 샘플 1 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4450.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 291]
  고유값: [1, 12, 23, 34, 45, 56, 67, 78, 89, 145, 200, 208, 212, 213, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 

2025-09-04 14:09:56,790 - INFO - Epoch   4: Train Loss=0.0244, Val Loss=0.0202, Train MAPE=61718.76%, Val MAPE=20951.88%
Training Progress:  30%|███       | 3/10 [01:13<02:06, 18.10s/it, T_Loss=0.0244, V_Loss=0.0202(검증됨), V_MAPE=20951.88%, Best=0.0205, Patience=0/20]2025-09-04 14:09:56,816 - INFO -   → Best model saved! (Val Loss: 0.0202)
Training Progress:  40%|████      | 4/10 [01:13<01:50, 18.46s/it, T_Loss=0.0244, V_Loss=0.0202(검증됨), V_MAPE=20951.88%, Best=0.0205, Patience=0/20]


=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0





  범위: [0.0000, 4175.0000]

Epoch 5 [Train]:  26%|██▋       | 450/1714 [00:04<00:12, 103.57it/s, Loss=0.1339, MAPE=75.13%][A


범주형 데이터: torch.Size([50, 4])
  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8

  임베딩 결과: torch.Size([50, 4, 8])  임베딩 NaN: 0  임베딩 Inf: 0

  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])





  결합 NaN: 0

Epoch 5 [Train]:  26%|██▋       | 450/1714 [00:04<00:12, 103.57it/s, Loss=0.0182, MAPE=77.79%]




[A

  결합 Inf: 0





  결합 범위: [-2.5095, 4175.0000]

Epoch 5 [Train]:  27%|██▋       | 461/1714 [00:04<00:12, 97.85it/s, Loss=0.0182, MAPE=77.79%] [A


타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50







Epoch 5 [Train]:  43%|████▎     | 731/1714 [00:07<00:09, 102.44it/s, Loss=0.0164, MAPE=55.96%][A

연속형 데이터: torch.Size([50, 20])





  NaN: 0

Epoch 5 [Train]:  43%|████▎     | 742/1714 [00:07<00:09, 102.68it/s, Loss=0.0164, MAPE=55.96%]




[A

  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])





  Flatten NaN: 0

Epoch 5 [Train]:  43%|████▎     | 742/1714 [00:07<00:09, 102.68it/s, Loss=0.0183, MAPE=61.68%]




[A

최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0







Epoch 5 [Train]:  48%|████▊     | 818/1714 [00:08<00:08, 102.54it/s, Loss=0.0137, MAPE=62.32%][A

  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0





  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])


Epoch 5 [Train]:  48%|████▊     | 818/1714 [00:08<00:08, 102.54it/s, Loss=0.0151, MAPE=55.84%]

  Flatten NaN: 0
최종 결합: torch.Size([50, 52])


[A

  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]


2025-09-04 14:10:14,087 - INFO - Epoch   5: Train Loss=0.0243, Train MAPE=66567.06% (검증 생략)
Training Progress:  50%|█████     | 5/10 [01:31<01:30, 18.03s/it, T_Loss=0.0243, V_Loss=0.0202(이전값), V_MAPE=20951.88%, Best=0.0202, Patience=0/20]


=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0


Epoch 6 [Train]:  48%|████▊     | 831/1714 [00:08<00:08, 101.70it/s, Loss=0.0503, MAPE=61.21%]




[A

  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])





  Flatten NaN: 0
최종 결합: torch.Size([50, 52])

Epoch 6 [Train]:  48%|████▊     | 831/1714 [00:08<00:08, 101.70it/s, Loss=0.0126, MAPE=69.67%]


  결합 NaN: 0


[A

  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])





  NaN: 0

Epoch 6 [Train]:  70%|███████   | 1205/1714 [00:11<00:04, 102.05it/s, Loss=0.0154, MAPE=67.85%]




[A

  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307  임베딩 embed_dim: 8







Epoch 6 [Train]:  70%|███████   | 1205/1714 [00:11<00:04, 102.05it/s, Loss=0.0130, MAPE=64.44%]


  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0

[A


  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4175.0000]







Epoch 6 [Train]:  97%|█████████▋| 1656/1714 [00:16<00:00, 101.68it/s, Loss=0.0077, MAPE=53.02%]

범주형 데이터: torch.Size([50, 4])


[A

  값 범위: [3, 296]


Epoch 6 [Train]:  97%|█████████▋| 1667/1714 [00:16<00:00, 100.70it/s, Loss=0.0077, MAPE=53.02%]




[A

  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])





  타겟 NaN: 0

Epoch 6 [Train]:  97%|█████████▋| 1667/1714 [00:16<00:00, 100.70it/s, Loss=0.0335, MAPE=52.16%][A


  타겟 범위: [0.0030, 0.0400]





=== 샘플 0 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 5749.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [0, 289]
  고유값: [0, 1, 12, 23, 34, 45, 56, 67, 78, 89, 100, 111, 112, 134, 156, 167, 178, 189, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 222, 233, 244, 255, 266, 280, 286, 287, 288, 289]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.7773, 5749.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0150, 1.0080]

=== 샘플 1 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4450.0000]

범주형 데이터: torch.Size([50, 4])  값 범위: [1, 291]
  고유값: [1, 12, 23, 34, 45, 56, 67, 78, 89, 145, 200, 208, 212, 213, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 

2025-09-04 14:10:33,191 - INFO - Epoch   6: Train Loss=0.0242, Val Loss=0.0202, Train MAPE=66663.10%, Val MAPE=17262.90%
Training Progress:  50%|█████     | 5/10 [01:50<01:30, 18.03s/it, T_Loss=0.0242, V_Loss=0.0202(검증됨), V_MAPE=17262.90%, Best=0.0202, Patience=0/20]2025-09-04 14:10:33,219 - INFO -   → Best model saved! (Val Loss: 0.0202)
Training Progress:  60%|██████    | 6/10 [01:50<01:13, 18.41s/it, T_Loss=0.0242, V_Loss=0.0202(검증됨), V_MAPE=17262.90%, Best=0.0202, Patience=0/20]


=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0





  Inf: 0

Epoch 7 [Train]:  36%|███▌      | 611/1714 [00:06<00:10, 101.82it/s, Loss=0.0169, MAPE=58.79%]




[A

  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])  Flatten NaN: 0






최종 결합: torch.Size([50, 52])

Epoch 7 [Train]:  36%|███▌      | 611/1714 [00:06<00:10, 101.82it/s, Loss=0.0460, MAPE=114.58%]




[A

  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])



Epoch 7 [Train]:  79%|███████▉  | 1356/1714 [00:13<00:03, 101.52it/s, Loss=0.0218, MAPE=66.65%]

  값 범위: [1, 293]

[A


  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])





  Flatten NaN: 0


Epoch 7 [Train]:  79%|███████▉  | 1356/1714 [00:13<00:03, 101.52it/s, Loss=0.0767, MAPE=59.25%][A


  결합 Inf: 0최종 결합: torch.Size([50, 52])  결합 NaN: 0

  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])



Epoch 7 [Train]:  87%|████████▋ | 1486/1714 [00:14<00:02, 100.94it/s, Loss=0.0218, MAPE=74.92%]

  NaN: 0

[A


  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8





  임베딩 결과: torch.Size([50, 4, 8])

Epoch 7 [Train]:  87%|████████▋ | 1486/1714 [00:14<00:02, 100.94it/s, Loss=0.0152, MAPE=60.60%][A


  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0

  타겟 범위: [0.0060, 0.0380]

2025-09-04 14:10:50,461 - INFO - Epoch   7: Train Loss=0.0239, Train MAPE=68148.45% (검증 생략)
Training Progress:  70%|███████   | 7/10 [02:07<00:54, 18.03s/it, T_Loss=0.0239, V_Loss=0.0202(이전값), V_MAPE=17262.90%, Best=0.0202, Patience=0/20]


=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0





  범위: [0.0000, 4175.0000]

Epoch 8 [Train]:  14%|█▍        | 239/1714 [00:02<00:14, 101.50it/s, Loss=0.0319, MAPE=55.30%]




[A

범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])


Epoch 8 [Train]:  14%|█▍        | 239/1714 [00:02<00:14, 101.50it/s, Loss=0.0183, MAPE=59.58%]




[A

  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0



Epoch 8 [Train]:  48%|████▊     | 820/1714 [00:08<00:08, 102.20it/s, Loss=0.0193, MAPE=58.45%]

  범위: [0.0000, 4175.0000]

[A


범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0





  결합 Inf: 0

Epoch 8 [Train]:  48%|████▊     | 820/1714 [00:08<00:08, 102.20it/s, Loss=0.0079, MAPE=59.60%]




[A

  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])


Epoch 8 [Train]:  82%|████████▏ | 1409/1714 [00:13<00:03, 101.37it/s, Loss=0.0191, MAPE=78.16%]




[A

  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...


Epoch 8 [Train]:  82%|████████▏ | 1409/1714 [00:13<00:03, 101.37it/s, Loss=0.0183, MAPE=82.70%]




[A

  임베딩 vocab_size: 307  임베딩 Inf: 0
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0

  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 0 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 5749.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [0, 289]
  고유값: [0, 1, 12, 23, 34, 45, 56, 67, 78, 89, 100, 111, 112, 134, 156, 167, 178, 189, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 222, 233, 244, 255, 266, 280, 286, 287, 288, 289]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.7773, 5749.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0150, 1.0080]

=== 샘플 1 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4450.0000]범주형 데이터: torch.Size([50, 4])

  값 범위: [1, 291]
  고유값: [1, 12, 23, 34, 45, 56, 67, 78, 89, 145, 200, 208, 212, 213, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 

2025-09-04 14:11:09,772 - INFO - Epoch   8: Train Loss=0.0238, Val Loss=0.0206, Train MAPE=64260.82%, Val MAPE=22810.22%
Training Progress:  80%|████████  | 8/10 [02:26<00:36, 18.43s/it, T_Loss=0.0238, V_Loss=0.0206(검증됨), V_MAPE=22810.22%, Best=0.0202, Patience=0/20]


=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0


Epoch 9 [Train]:  18%|█▊        | 304/1714 [00:03<00:13, 101.85it/s, Loss=0.0203, MAPE=59.44%]




[A

  Inf: 0





  범위: [0.0000, 4175.0000]

Epoch 9 [Train]:  18%|█▊        | 315/1714 [00:03<00:13, 100.58it/s, Loss=0.0203, MAPE=59.44%]


범주형 데이터: torch.Size([50, 4])

[A


  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0





최종 결합: torch.Size([50, 52])


Epoch 9 [Train]:  18%|█▊        | 315/1714 [00:03<00:13, 100.58it/s, Loss=0.0149, MAPE=67.48%]

  결합 NaN: 0


[A

  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0





  범위: [0.0000, 4175.0000]

Epoch 9 [Train]:  20%|█▉        | 336/1714 [00:03<00:14, 95.39it/s, Loss=0.0330, MAPE=55.84%]




[A

범주형 데이터: torch.Size([50, 4])
  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])





  Flatten NaN: 0
최종 결합: torch.Size([50, 52])


Epoch 9 [Train]:  20%|█▉        | 336/1714 [00:03<00:14, 95.39it/s, Loss=0.0076, MAPE=63.79%]

  결합 NaN: 0

[A


  결합 Inf: 0
  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0



Epoch 9 [Train]:  58%|█████▊    | 993/1714 [00:09<00:07, 102.47it/s, Loss=0.1532, MAPE=54.32%]

  범위: [0.0000, 4175.0000]

[A


범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])





  결합 NaN: 0


Epoch 9 [Train]:  58%|█████▊    | 993/1714 [00:09<00:07, 102.47it/s, Loss=0.0346, MAPE=58.47%]

  결합 Inf: 0


[A

  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]


2025-09-04 14:11:27,036 - INFO - Epoch   9: Train Loss=0.0237, Train MAPE=68876.10% (검증 생략)
Training Progress:  90%|█████████ | 9/10 [02:44<00:18, 18.07s/it, T_Loss=0.0237, V_Loss=0.0206(이전값), V_MAPE=22810.22%, Best=0.0202, Patience=1/20]


=== 샘플 1 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0





  범위: [0.0000, 4175.0000]

Epoch 10 [Train]:   4%|▎         | 61/1714 [00:00<00:16, 100.33it/s, Loss=0.0100, MAPE=57.07%]




[A

범주형 데이터: torch.Size([50, 4])
  값 범위: [3, 296]
  고유값: [3, 4, 5, 9, 11, 12, 13, 19, 24, 25, 56, 67, 78, 89, 100, 145, 200, 221, 222, 233, 238, 241, 244, 256, 262, 268, 271, 276, 279, 284, 293, 294, 295, 296]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0





  결합 Inf: 0

Epoch 10 [Train]:   4%|▎         | 61/1714 [00:00<00:16, 100.33it/s, Loss=0.0223, MAPE=67.05%]




[A

  결합 범위: [-2.5095, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]





=== 샘플 0 디버깅 ===
timekey_hr: 2025050307
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0




  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 293]
  고유값: [1, 34, 56, 67, 78, 89, 100, 112, 200, 204, 214, 218, 219, 221, 222, 223, 224, 229, 241, 244, 245, 249, 255, 256, 279, 284, 287, 288, 289, 290, 291, 292, 293]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8





  임베딩 결과: torch.Size([50, 4, 8])


Epoch 10 [Train]:  44%|████▍     | 753/1714 [00:07<00:09, 102.07it/s, Loss=0.0141, MAPE=52.98%]

  임베딩 NaN: 0


[A

  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.9151, 4175.0000]
타겟 데이터: torch.Size([50])





  타겟 NaN: 0
  타겟 범위: [0.0030, 0.0400]

Epoch 10 [Train]:  44%|████▍     | 753/1714 [00:07<00:09, 102.07it/s, Loss=0.0520, MAPE=523899.81%][A







=== 샘플 2 디버깅 ===
timekey_hr: 2025050307
actual_length: 50





연속형 데이터: torch.Size([50, 20])

Epoch 10 [Train]:  85%|████████▍ | 1456/1714 [00:14<00:02, 102.40it/s, Loss=0.0216, MAPE=54.44%]




[A

  NaN: 0
  Inf: 0
  범위: [0.0000, 4175.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [9, 301]
  고유값: [9, 46, 56, 58, 60, 65, 66, 68, 78, 100, 145, 200, 218, 219, 221, 222, 232, 233, 234, 241, 242, 244, 255, 256, 276, 279, 284, 296, 298, 299, 300, 301]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])


Epoch 10 [Train]:  85%|████████▍ | 1456/1714 [00:14<00:02, 102.40it/s, Loss=0.0239, MAPE=58.39%]




[A

  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-3.8325, 4175.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0060, 0.0380]





=== 샘플 0 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 5749.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [0, 289]
  고유값: [0, 1, 12, 23, 34, 45, 56, 67, 78, 89, 100, 111, 112, 134, 156, 167, 178, 189, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 222, 233, 244, 255, 266, 280, 286, 287, 288, 289]
임베딩 적용 중...
  임베딩 vocab_size: 307  임베딩 embed_dim: 8

  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0
  결합 범위: [-2.7773, 5749.0000]
타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0150, 1.0080]

=== 샘플 1 디버깅 ===
timekey_hr: 2025071400
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4450.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [1, 291]
  고유값: [1, 12, 23, 34, 45, 56, 67, 78, 89, 145, 200, 208, 212, 213, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 

2025-09-04 14:11:46,228 - INFO - Epoch  10: Train Loss=0.0234, Val Loss=0.0204, Train MAPE=64784.27%, Val MAPE=20515.76%
Training Progress: 100%|██████████| 10/10 [03:03<00:00, 18.32s/it, T_Loss=0.0234, V_Loss=0.0204(검증됨), V_MAPE=20515.76%, Best=0.0202, Patience=1/20]
2025-09-04 14:11:46,231 - INFO - 훈련 완료, 테스트 시작...
2025-09-04 14:11:46,231 - INFO - 모델 로드: 250904_test/lstm_20250904_135229.pth
2025-09-04 14:11:46,246 - INFO - 테스트 시작
Testing:   0%|          | 0/200 [00:00<?, ?it/s]


=== 샘플 0 디버깅 ===
timekey_hr: 2025072300
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 6911.0000]
범주형 데이터: torch.Size([50, 4])
  값 범위: [0, 289]
  고유값: [0, 1, 12, 23, 34, 45, 56, 67, 78, 89, 100, 111, 112, 134, 156, 167, 178, 189, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 222, 233, 244, 255, 266, 282, 286, 287, 288, 289]
임베딩 적용 중...
  임베딩 vocab_size: 307
  임베딩 embed_dim: 8
  임베딩 결과: torch.Size([50, 4, 8])
  임베딩 NaN: 0
  임베딩 Inf: 0
  Flatten 후: torch.Size([50, 32])
  Flatten NaN: 0
최종 결합: torch.Size([50, 52])
  결합 NaN: 0
  결합 Inf: 0  결합 범위: [-2.2500, 6911.0000]

타겟 데이터: torch.Size([50])
  타겟 NaN: 0
  타겟 범위: [0.0130, 1.2450]

=== 샘플 1 디버깅 ===
timekey_hr: 2025072300
actual_length: 50
연속형 데이터: torch.Size([50, 20])
  NaN: 0
  Inf: 0
  범위: [0.0000, 4500.0000]

범주형 데이터: torch.Size([50, 4])  값 범위: [1, 291]
  고유값: [1, 12, 23, 34, 45, 56, 67, 78, 89, 145, 200, 208, 212, 213, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 

Testing: 100%|██████████| 200/200 [00:03<00:00, 59.86it/s]
2025-09-04 14:11:49,611 - INFO - 테스트 결과: RMSE=0.1503, MAE=0.0523, MAPE=8027.00%
2025-09-04 14:11:49,611 - INFO - 구조화된 예측 결과: 157,944개
2025-09-04 14:11:49,613 - INFO - 결과 저장 완료: 250904_test/lstm_20250904_135229_results.json
2025-09-04 14:11:51,202 - INFO - 구조화된 예측 결과 저장:
2025-09-04 14:11:51,203 - INFO -   - 파일 경로: 250904_test/lstm_20250904_135229_predictions.csv
2025-09-04 14:11:51,203 - INFO -   - 저장된 예측 개수: 157,944개
2025-09-04 14:11:51,205 - INFO -   - 고유한 timekey_hr: 199개
2025-09-04 14:11:51,215 - INFO -   - 고유한 oper_id: 813개
2025-09-04 14:11:51,228 - INFO -   - 고유한 first_oper_id: 255개
2025-09-04 14:11:51,236 - INFO -   - 고유한 last_oper_id: 249개
2025-09-04 14:11:51,237 - INFO -   - 평균 윈도우 길이: 49.7
2025-09-04 14:11:51,238 - INFO - 실험 완료: lstm_20250904_135229
2025-09-04 14:11:51,239 - INFO - 모델 타입: lstm
2025-09-04 14:11:51,239 - INFO - 테스트 RMSE: 0.1503
2025-09-04 14:11:51,240 - INFO - 테스트 MAE: 0.0523
2025-09-04 14:11:51,240 - IN