In [None]:
def prepare_datasets(save_dir='../dataset/'):
    os.makedirs(save_dir, exist_ok=True)

    levels = [
        [],                        # Level 1: Total
        ['state_id'],              # Level 2: State
        ['store_id'],              # Level 3: Store
        ['cat_id'],                # Level 4: Category
        ['dept_id'],               # Level 5: Department
        ['state_id', 'cat_id'],    # Level 6: State-Category
        ['state_id', 'dept_id'],   # Level 7: State-Department
        ['store_id', 'cat_id'],    # Level 8: Store-Category
        ['store_id', 'dept_id'],   # Level 9: Store-Department
        ['item_id'],               # Level 10: Item
        ['item_id', 'state_id'],   # Level 11: Item-State
        ['item_id', 'store_id']    # Level 12: Individual
    ]

    for level_idx, level in enumerate(levels[11:], start=11):
        datasets = {'train': {}, 'test': {}}

        highlight_print(f"Preparing dataset for level {level_idx+1}")

        # 데이터 로드
        agg_df = pd.read_csv(f'../data/preprocessed/agg_df_level_{level_idx+1}.csv')
        calendar_df = pd.read_csv('../data/preprocessed/calendar_df.csv')

        # 메모리 절약
        agg_df = save_memory(agg_df)
        calendar_df = save_memory(calendar_df)

        # 날짜 처리
        start_date = pd.to_datetime('2011-01-29')
        agg_df['d'] = agg_df['d'].apply(lambda x: int(x.split('_')[1]) - 1)
        agg_df['d'] = start_date + pd.to_timedelta(agg_df['d'], unit='D')
        calendar_df['d'] = calendar_df['d'].apply(lambda x: int(x.split('_')[1]) - 1)
        calendar_df['d'] = start_date + pd.to_timedelta(calendar_df['d'], unit='D')
        
        # ID 컬럼 생성
        if len(level) == 0:
            agg_df.insert(1, 'id', 'total')
        elif len(level) == 1:
            agg_df.insert(1, 'id', agg_df[level[0]])
        elif len(level) > 1:
            agg_df.insert(1, 'id', agg_df[level[0]] + '_' + agg_df[level[1]])
        
        # 그룹 생성
        groups = agg_df['id'].unique()
        group_encoder = {group: group_idx for group_idx, group in enumerate(groups)}
        
        # GluonTS dataset 생성
        train_data = []
        test_data = []
        for idx,group in enumerate(groups):
            print(f'{idx}/{len(groups)}')
            group_df = agg_df[agg_df['id'] == group] # 1941 rows
            group_df = group_df.merge(calendar_df, on="d", how="left")
            group_df = save_memory(group_df)

            train_data.append({
                FieldName.ITEM_ID: str(group),
                FieldName.TARGET: group_df['sales_sum'].values[:1913],
                FieldName.START: start_date,
                FieldName.FEAT_STATIC_CAT: [group_encoder[group]],
                FieldName.FEAT_DYNAMIC_REAL: group_df[[
                    "sales_mean", "sales_std", "sales_max", "sales_min", 
                    "sales_lag1", "sales_lag7", "sales_lag28",  
                    "sales_rolling7_mean", "sales_rolling28_mean", 
                    "sales_trend", 
                    "release_mean", "out_of_stock_mean", 
                    "sell_price_mean", "sell_price_std", "sell_price_max", "sell_price_min", 
                    "sell_price_diff", "sell_price_trend", "sell_price_in_store_mean",
                    "snap_CA", "snap_TX", "snap_WI",  
                    "year_delta", "quarter_sin", "quarter_cos", "month_sin", "month_cos",  
                    "day_sin", "day_cos", "weekday_sin", "weekday_cos",
                    'event_count'
                ]].values[:1913].T,
                FieldName.FEAT_DYNAMIC_CAT: group_df[[
                    'snap_CA', 'snap_TX', 'snap_WI', 
                    'event_name_1_enc', 'event_name_2_enc', 'event_type_1_enc', 'event_type_2_enc'
                ]].values[:1913].T,
            })

            test_data.append({
                FieldName.ITEM_ID: str(group),
                FieldName.TARGET: group_df['sales_sum'].values,
                FieldName.START: start_date,
                FieldName.FEAT_STATIC_CAT: [group_encoder[group]],
                FieldName.FEAT_DYNAMIC_REAL: group_df[[
                    "sales_mean", "sales_std", "sales_max", "sales_min", 
                    "sales_lag1", "sales_lag7", "sales_lag28",  
                    "sales_rolling7_mean", "sales_rolling28_mean", 
                    "sales_trend", 
                    "release_mean", "out_of_stock_mean", 
                    "sell_price_mean", "sell_price_std", "sell_price_max", "sell_price_min", 
                    "sell_price_diff", "sell_price_trend", "sell_price_in_store_mean",
                    "snap_CA", "snap_TX", "snap_WI",  
                    "year_delta", "quarter_sin", "quarter_cos", "month_sin", "month_cos",  
                    "day_sin", "day_cos", "weekday_sin", "weekday_cos",
                    'event_count'
                ]].values.T,
                FieldName.FEAT_DYNAMIC_CAT: group_df[[
                    'snap_CA', 'snap_TX', 'snap_WI', 
                    'event_name_1_enc', 'event_name_2_enc', 
                    'event_type_1_enc', 'event_type_2_enc'
                ]].values.T,
            })

        train_dataset = ListDataset(train_data, freq="D")
        test_dataset = ListDataset(test_data, freq="D")
        
        datasets['train'] = train_dataset
        datasets['test'] = test_dataset
    
        with open(os.path.join(save_dir, f'dataset_level_{level_idx+1}.pkl'), 'wb') as f:
            pickle.dump(datasets, f)

In [None]:
def train_models(epochs, lr, batch_size, quantiles=[0.005, 0.025, 0.165, 0.25, 0.5, 0.75, 0.835, 0.975, 0.995], save_dir='../result/test/'):
    # 저장 경로
    os.makedirs(save_dir, exist_ok=True)

    # Train/Test 데이터셋 준비
    datasets = {}
    if not os.path.exists('../dataset/') or len(os.listdir('../dataset/')) != 12:
        prepare_datasets('../dataset/')
    for level_idx in range(1, 13): 
        highlight_print(f"Loading dataset for level {level_idx+1}")
        with open(os.path.join('../dataset/', f'dataset_level_{level_idx+1}.pkl'), 'rb') as f:
            dataset = pickle.load(f)
            datasets['train'][level_idx+1] = dataset['train']
            datasets['test'][level_idx+1] = dataset['test']
    
    # 각 레벨별로 TFT 모델 학습
    level_estimators = {}
    level_preds = {}
    level_metrics = {}
    
    for level_idx in range(1, 13):
        highlight_print(f"Training model for level {level_idx+1}")

        # 저장 경로
        level_dir = os.path.join(save_dir, f'level_{level_idx}')
        os.makedirs(level_dir, exist_ok=True)

        # 데이터셋 로드
        train_dataset = datasets['train'][level_idx+1]
        test_dataset = datasets['test'][level_idx+1]

        # 모델 선언
        estimator = TemporalFusionTransformerEstimator(
            freq="D",
            context_length=28,
            prediction_length=28,
            quantiles=quantiles,

            hidden_dim=64,
            num_heads=4,
            dropout_rate=0.1,

            static_cardinalities=[len(groups)],
            dynamic_dims=[32],
            dynamic_cardinalities=[2, 2, 2, 31, 31, 5, 5],

            trainer_kwargs={
                "max_epochs": epochs,
                "accelerator": "auto",
                'callbacks': [
                    EarlyStopping(
                        monitor='val_loss', 
                        patience=5
                    ),
                    # 모델 저장
                    ModelCheckpoint(
                        dirpath=level_dir,
                        filename='checkpoint-{epoch:02d}-{val_loss:.4f}',
                        monitor='val_loss',
                        mode='min',
                        save_top_k=1,
                        save_last=False 
                    ),
                ]
            },
            lr=lr,
            batch_size=batch_size,

            validation_sampler = ValidationSplitSampler(min_future=28)
        )
        
        # 모델 학습
        predictor = estimator.train(train_dataset)

        # 모델 예측
        forecast_it, test_it = make_evaluation_predictions(
            dataset=test_dataset,
            predictor=predictor,
        )
        forecasts = list(forecast_it)
        tests = list(test_it)

        # 예측 결과
        level_pred = {}
        for forecast in forecasts:
            id_preds = {quantile: forecast.quantile(quantile) for quantile in quantiles}
            level_pred[forecast.item_id] = id_preds

        # 모델 평가
        evaluator = Evaluator(quantiles=quantiles)
        agg_metrics, item_metrics = evaluator(tests, forecasts)

        # 결과 저장
        level_estimators[level_idx+1] = estimator
        level_preds[level_idx+1] = level_pred
        level_metrics[level_idx+1] = agg_metrics

        # 예측 결과 저장
        with open(os.path.join(level_dir, 'predictions.pkl'), 'wb') as f:
            pickle.dump(level_preds[level_idx], f)
        
        # 평가 지표 저장
        metrics_df = pd.DataFrame(level_metrics[level_idx]).round(4)
        metrics_df.to_csv(os.path.join(level_dir, 'metrics.csv'))
    
    # 하이퍼파라미터 저장
    with open(os.path.join(save_dir, 'model_params.json'), 'w') as f:
        model_params = {
            'epochs': epochs,
            'learning_rate': lr,
            'batch_size': batch_size
        }
        json.dump(model_params, f, indent=4)


train_models(epochs=1, lr=1e-1, batch_size=128, save_dir='../result/test/prac')