In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from types import SimpleNamespace
from sklearn.preprocessing import MinMaxScaler
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.cuda.amp import autocast, GradScaler
import os
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.insert(0,'/content/drive/MyDrive/dacon')

import os
os.chdir("/content/drive/MyDrive/dacon")

## Hyperparameter Setting

In [None]:
config = {
    "learning_rate": 0.001,
    "weight_decay" : 0.001,
    "epoch": 50,
    "batch_size": 64,
    "hidden_size": 512,
    "num_layers": 3,
    "output_size": 3
}

CFG = SimpleNamespace(**config)

품목_리스트 = ['건고추', '사과', '감자', '배', '깐마늘(국산)', '무', '상추', '배추', '양파', '대파']

\## Define Function for Feature Engineering


*  타겟의 필터 조건을 제외한 메타데이터의 필터 조건은 참가자들 각자의 기준에 맞춰 자유롭게 사용가능

* 밑의 필터 조건은 임의로 제공하는 예시



In [None]:
# Let's enhance the `process_data` function to reflect the requested changes.
# 1. Fill '평년 평균가격(원)' with '평균가격(원)' when it's 0 in train.
# 2. In 공판장 and 도매 data, fill '평년 평균가격(원) Common Year SOON' with '전년 평균가격(원) PreVious YeaR' when it's 0.
# 3. Apply `bfill` and `ffill` to price-related data when the filtered data has 0 values.

def process_data(raw_file, 산지공판장_file, 전국도매_file, 품목명, scaler=None,last_observed_value=None):
    raw_data = pd.read_csv(raw_file)
    산지공판장 = pd.read_csv(산지공판장_file)
    전국도매 = pd.read_csv(전국도매_file)

# 타겟 및 메타데이터 필터 조건 정의
    conditions = {
    '감자': {
        'target': lambda df: (df['품종명'] == '감자 수미') & (df['거래단위'] == '20키로상자') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['감자'], '품종명': ['수미'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['감자'], '품종명': ['수미']}
    },
    '건고추': {
        'target': lambda df: (df['품종명'] == '화건') & (df['거래단위'] == '30 kg') & (df['등급'] == '상품'),
        '공판장': None,
        '도매': None
    },
    '깐마늘(국산)': {
        'target': lambda df: (df['거래단위'] == '20 kg') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['마늘'], '품종명': ['깐마늘'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['마늘'], '품종명': ['깐마늘']}
    },
    '대파': {
        'target': lambda df: (df['품종명'] == '대파(일반)') & (df['거래단위'] == '1키로단') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['대파'], '품종명': ['대파(일반)'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['대파'], '품종명': ['대파(일반)']}
    },
    '무': {
        'target': lambda df: (df['거래단위'] == '20키로상자') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['무'], '품종명': ['기타무'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['무'], '품종명': ['무']}
    },
    '배추': {
        'target': lambda df: (df['거래단위'] == '10키로망대') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['배추'], '품종명': ['쌈배추'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['배추'], '품종명': ['배추']}
    },
    '사과': {
        'target': lambda df: (df['품종명'].isin(['홍로', '후지'])) & (df['거래단위'] == '10 개') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['사과'], '품종명': ['후지'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['사과'], '품종명': ['후지']}
    },
    '상추': {
        'target': lambda df: (df['품종명'] == '청') & (df['거래단위'] == '100 g') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['상추'], '품종명': ['청상추'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['상추'], '품종명': ['청상추']}
    },
    '양파': {
        'target': lambda df: (df['품종명'] == '양파') & (df['거래단위'] == '1키로') & (df['등급'] == '상'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['양파'], '품종명': ['기타양파'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['양파'], '품종명': ['양파(일반)']}
    },
    '배': {
        'target': lambda df: (df['품종명'] == '신고') & (df['거래단위'] == '10 개') & (df['등급'] == '상품'),
        '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['배'], '품종명': ['신고'], '등급명': ['상']},
        '도매': {'시장명': ['*전국도매시장'], '품목명': ['배'], '품종명': ['신고']}
    }
    }

  # 타겟 데이터 필터링
    raw_품목 = raw_data[raw_data['품목명'] == 품목명]
    target_mask = conditions[품목명]['target'](raw_품목)
    filtered_data = raw_품목[target_mask]


        # '평균가격(원)'에서 값이 0인 경우, 이를 품목별 평균값으로 대체
    def fill_zero_price(row):
        if row['평균가격(원)'] == 0:
            condition = (
                (filtered_data['품종명'] == row['품종명']) &
                (filtered_data['거래단위'] == row['거래단위']) &
                (filtered_data['등급'] == row['등급']) &
                (filtered_data['평균가격(원)'] != 0)
            )
            prices = filtered_data.loc[condition, '평균가격(원)']
            if not prices.empty:
                min_price = prices.min()
                max_price = prices.max()
                return (min_price + max_price) / 2
            else:
                return row['평균가격(원)']  # 또는 다른 대체값 지정
        else:
            return row['평균가격(원)']

    filtered_data['평균가격(원)'] = filtered_data.apply(fill_zero_price, axis=1)


    # '평년 평균가격(원)'에서 값이 0인 경우, 이를 품목별 평균값으로 대체
    def fill_zero_common_year_price(row):
        if row['평년 평균가격(원)'] == 0:
            condition = (
                (filtered_data['품종명'] == row['품종명']) &
                (filtered_data['거래단위'] == row['거래단위']) &
                (filtered_data['등급'] == row['등급']) &
                (filtered_data['평년 평균가격(원)'] != 0)
            )
            prices = filtered_data.loc[condition, '평년 평균가격(원)']
            if not prices.empty:
                min_price = prices.min()
                max_price = prices.max()
                return (min_price + max_price) / 2
            else:
                return row['평년 평균가격(원)']  # 또는 다른 대체값 지정
        else:
            return row['평년 평균가격(원)']

    filtered_data['평년 평균가격(원)'] = filtered_data.apply(fill_zero_common_year_price, axis=1)





    # 마지막 관측치 저장
    last_observed_value = filtered_data['평균가격(원)'].iloc[-1]  # 마지막 관측치 저장

    # 차분 적용
    filtered_data['평균가격(원)'] = filtered_data['평균가격(원)'].diff()
    filtered_data['평균가격(원)'].bfill(inplace=True)

    # 2. 공판장 데이터 처리
    if conditions[품목명]['공판장']:
        filtered_공판장 = 산지공판장
        for key, value in conditions[품목명]['공판장'].items():
            filtered_공판장 = filtered_공판장[filtered_공판장[key].isin(value)]

        # 총반입량 0이하 삭제
        filtered_공판장 = filtered_공판장.loc[filtered_공판장['총반입량(kg)'] > 0]

        # 2. 전순, 전달, 전년, 평년
        filtered_공판장.loc[filtered_공판장['전순 평균가격(원) PreVious SOON'] == 0, '전순 평균가격(원) PreVious SOON'] = filtered_공판장['평균가(원/kg)'].shift(1)
        filtered_공판장.loc[filtered_공판장['전달 평균가격(원) PreVious MMonth'] == 0, '전달 평균가격(원) PreVious MMonth'] = filtered_공판장['평균가(원/kg)'].shift(3)
        filtered_공판장.loc[filtered_공판장['전년 평균가격(원) PreVious YeaR'] == 0, '전년 평균가격(원) PreVious YeaR'] = filtered_공판장['평균가(원/kg)'].shift(36)
        filtered_공판장.loc[filtered_공판장['평년 평균가격(원) Common Year SOON'] == 0, '평년 평균가격(원) PreVious YeaR'] = filtered_공판장['전년 평균가격(원) PreVious YeaR']


    # 3. 도매 데이터 처리
    if conditions[품목명]['도매']:
        filtered_도매 = 전국도매
        for key, value in conditions[품목명]['도매'].items():
            filtered_도매 = filtered_도매[filtered_도매[key].isin(value)]

        # 총반입량 0이하 삭제
        filtered_도매 = filtered_도매.loc[filtered_도매['총반입량(kg)'] > 0]

        # 전순, 전달, 전년, 평년
        filtered_도매.loc[filtered_도매['전순 평균가격(원) PreVious SOON'] == 0, '전순 평균가격(원) PreVious SOON'] = filtered_도매['평균가(원/kg)'].shift(1)
        filtered_도매.loc[filtered_도매['전달 평균가격(원) PreVious MMonth'] == 0, '전달 평균가격(원) PreVious MMonth'] = filtered_도매['평균가(원/kg)'].shift(3)
        filtered_도매.loc[filtered_도매['전년 평균가격(원) PreVious YeaR'] == 0, '전년 평균가격(원) PreVious YeaR'] = filtered_도매['평균가(원/kg)'].shift(36)
        filtered_도매.loc[filtered_도매['평년 평균가격(원) Common Year SOON'] == 0, '평년 평균가격(원) PreVious YeaR'] = filtered_도매['전년 평균가격(원) PreVious YeaR']

    # 수치형 컬럼 처리
    numeric_columns = filtered_data.select_dtypes(include=[np.number]).columns
    filtered_data = filtered_data[['시점'] + list(numeric_columns)]
    filtered_data[numeric_columns] = filtered_data[numeric_columns].fillna(0)

    # 정규화 적용
    if scaler is None:
        scaler = MinMaxScaler()
        filtered_data[numeric_columns] = scaler.fit_transform(filtered_data[numeric_columns])
    else:
        filtered_data[numeric_columns] = scaler.transform(filtered_data[numeric_columns])

    #디버깅
    # print(f"Filtered data columns: {filtered_data.columns}")
    # print(f"Scaler min_: {scaler.min_}")
    # print(f"Scaler scale_: {scaler.scale_}")

    return filtered_data, scaler, last_observed_value

In [None]:
#

In [None]:
# # Let's enhance the `process_data` function to reflect the requested changes.
# # 1. Fill '평년 평균가격(원)' with '평균가격(원)' when it's 0 in train.
# # 2. In 공판장 and 도매 data, fill '평년 평균가격(원) Common Year SOON' with '전년 평균가격(원) PreVious YeaR' when it's 0.
# # 3. Apply `bfill` and `ffill` to price-related data when the filtered data has 0 values.

# def process_data(raw_file, 산지공판장_file, 전국도매_file, 품목명, scaler=None, is_train=True):
#     raw_data = pd.read_csv(raw_file)
#     산지공판장 = pd.read_csv(산지공판장_file)
#     전국도매 = pd.read_csv(전국도매_file)

# # 타겟 및 메타데이터 필터 조건 정의
#     conditions = {
#     '감자': {
#         'target': lambda df: (df['품종명'] == '감자 수미') & (df['거래단위'] == '20키로상자') & (df['등급'] == '상'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['감자'], '품종명': ['수미'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['감자'], '품종명': ['수미']}
#     },
#     '건고추': {
#         'target': lambda df: (df['품종명'] == '화건') & (df['거래단위'] == '30 kg') & (df['등급'] == '상품'),
#         '공판장': None,
#         '도매': None
#     },
#     '깐마늘(국산)': {
#         'target': lambda df: (df['거래단위'] == '20 kg') & (df['등급'] == '상품'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['마늘'], '품종명': ['깐마늘'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['마늘'], '품종명': ['깐마늘']}
#     },
#     '대파': {
#         'target': lambda df: (df['품종명'] == '대파(일반)') & (df['거래단위'] == '1키로단') & (df['등급'] == '상'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['대파'], '품종명': ['대파(일반)'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['대파'], '품종명': ['대파(일반)']}
#     },
#     '무': {
#         'target': lambda df: (df['거래단위'] == '20키로상자') & (df['등급'] == '상'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['무'], '품종명': ['기타무'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['무'], '품종명': ['무']}
#     },
#     '배추': {
#         'target': lambda df: (df['거래단위'] == '10키로망대') & (df['등급'] == '상'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['배추'], '품종명': ['쌈배추'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['배추'], '품종명': ['배추']}
#     },
#     '사과': {
#         'target': lambda df: (df['품종명'].isin(['홍로', '후지'])) & (df['거래단위'] == '10 개') & (df['등급'] == '상품'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['사과'], '품종명': ['후지'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['사과'], '품종명': ['후지']}
#     },
#     '상추': {
#         'target': lambda df: (df['품종명'] == '청') & (df['거래단위'] == '100 g') & (df['등급'] == '상품'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['상추'], '품종명': ['청상추'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['상추'], '품종명': ['청상추']}
#     },
#     '양파': {
#         'target': lambda df: (df['품종명'] == '양파') & (df['거래단위'] == '1키로') & (df['등급'] == '상'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['양파'], '품종명': ['기타양파'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['양파'], '품종명': ['양파(일반)']}
#     },
#     '배': {
#         'target': lambda df: (df['품종명'] == '신고') & (df['거래단위'] == '10 개') & (df['등급'] == '상품'),
#         '공판장': {'공판장명': ['*전국농협공판장'], '품목명': ['배'], '품종명': ['신고'], '등급명': ['상']},
#         '도매': {'시장명': ['*전국도매시장'], '품목명': ['배'], '품종명': ['신고']}
#     }
#     }

#   # 타겟 데이터 필터링
#     raw_품목 = raw_data[raw_data['품목명'] == 품목명]
#     target_mask = conditions[품목명]['target'](raw_품목)
#     filtered_data = raw_품목[target_mask]

#     # 1. '평년 평균가격(원)'이 0이면 '평균가격(원)'으로 채움
#     filtered_data.loc[filtered_data['평년 평균가격(원)'] == 0, '평년 평균가격(원)'] = filtered_data['평균가격(원)']

#     # 마지막 관측치 저장
#     last_observed_value = filtered_data['평균가격(원)'].iloc[-1]  # 마지막 관측치 저장

#     # 1차 차분 적용 (평균가격 및 평년 평균가격)
#     filtered_data['평균가격(원)_차분'] = filtered_data['평균가격(원)'].diff().fillna(0)
#     filtered_data['평년 평균가격(원)_차분'] = filtered_data['평년 평균가격(원)'].diff().fillna(0)

#     if is_train:
#         # 학습 데이터의 시점 파싱 (연도, 월, 순서) 및 주기성 피처 생성
#         filtered_data['연도'] = pd.to_numeric(filtered_data['시점'].str[:4], errors='coerce')
#         month_map = {'01': 1, '02': 2, '03': 3, '04': 4, '05': 5, '06': 6,
#                      '07': 7, '08': 8, '09': 9, '10': 10, '11': 11, '12': 12}
#         순_map = {'상순': 0, '중순': 1, '하순': 2}
#         filtered_data['월'] = filtered_data['시점'].str[4:6].map(month_map)
#         filtered_data['순'] = filtered_data['시점'].str[6:].map(순_map)
#         filtered_data['순_연간'] = (filtered_data['월'] - 1) * 3 + filtered_data['순']

#         # 1년(360일)을 36순 주기로 변환한 주기성 피처 생성
#         주기 = 36
#         filtered_data['sine_주기'] = np.sin(2 * np.pi * filtered_data['순_연간'] / 주기)
#         filtered_data['cosine_주기'] = np.cos(2 * np.pi * filtered_data['순_연간'] / 주기)
#     else:
#         # 테스트 데이터: 주기성이 필요 없으므로 연도, 월, 순, 주기성 피처를 모두 0 또는 NaN으로 채움
#         filtered_data['연도'] = np.nan
#         filtered_data['월'] = np.nan
#         filtered_data['순'] = np.nan
#         filtered_data['순_연간'] = np.nan
#         filtered_data['sine_주기'] = 0
#         filtered_data['cosine_주기'] = 0

#     # 공판장 데이터 처리
#     if conditions[품목명]['공판장']:
#         filtered_공판장 = 산지공판장
#         for key, value in conditions[품목명]['공판장'].items():
#             filtered_공판장 = filtered_공판장[filtered_공판장[key].isin(value)]
#         filtered_공판장 = filtered_공판장[filtered_공판장['총반입량(kg)'] > 0]
#         filtered_공판장.loc[filtered_공판장['평년 평균가격(원) Common Year SOON'] == 0,
#                              '평년 평균가격(원) Common Year SOON'] = filtered_공판장['전년 평균가격(원) PreVious YeaR']
#         filtered_공판장['공판장_평년 평균가격(원)_차분'] = filtered_공판장['평년 평균가격(원) Common Year SOON'].diff().fillna(0)
#         filtered_공판장 = filtered_공판장.add_prefix('공판장_').rename(columns={'공판장_시점': '시점'})
#         filtered_data = filtered_data.merge(filtered_공판장, on='시점', how='left')

#     # 도매 데이터 처리
#     if conditions[품목명]['도매']:
#         filtered_도매 = 전국도매
#         for key, value in conditions[품목명]['도매'].items():
#             filtered_도매 = filtered_도매[filtered_도매[key].isin(value)]
#         filtered_도매 = filtered_도매[filtered_도매['총반입량(kg)'] > 0]
#         filtered_도매.loc[filtered_도매['평년 평균가격(원) Common Year SOON'] == 0,
#                           '평년 평균가격(원) Common Year SOON'] = filtered_도매['전년 평균가격(원) PreVious YeaR']
#         filtered_도매['도매_평년 평균가격(원)_차분'] = filtered_도매['평년 평균가격(원) Common Year SOON'].diff().fillna(0)
#         filtered_도매 = filtered_도매.add_prefix('도매_').rename(columns={'도매_시점': '시점'})
#         filtered_data = filtered_data.merge(filtered_도매, on='시점', how='left')

#     # 수치형 컬럼 처리
#     numeric_columns = filtered_data.select_dtypes(include=[np.number]).columns
#     filtered_data = filtered_data[['시점'] + list(numeric_columns)]
#     filtered_data[numeric_columns] = filtered_data[numeric_columns].fillna(0)

#     # 정규화 적용
#     if scaler is None:
#         scaler = MinMaxScaler()
#         filtered_data[numeric_columns] = scaler.fit_transform(filtered_data[numeric_columns])
#     else:
#         filtered_data[numeric_columns] = scaler.transform(filtered_data[numeric_columns])

#     return filtered_data, scaler ,last_observed_value

## Define Custom Dataset Class

## Define Model and Training Functions

## Train Models and Generate Predictions

In [None]:
# Define Time2Vec layer
class PricePredictionGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(PricePredictionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state for GRU
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate through GRU layers
        out, _ = self.gru(x, h0)

        # Apply a fully connected layer to the output of the last GRU layer
        out = self.fc(out[:, -1, :])
        return out

# Define custom dataset class
class AgriculturePriceDataset(Dataset):
    def __init__(self, dataframe, window_size=9, prediction_length=3, is_test=False):
        self.data = dataframe
        self.window_size = window_size
        self.prediction_length = prediction_length
        self.is_test = is_test

        self.price_column = '평균가격(원)'
        self.numeric_columns = self.data.select_dtypes(include=[np.number]).columns.tolist()

        self.sequences = []
        if not self.is_test:
            for i in range(len(self.data) - self.window_size - self.prediction_length + 1):
                x = self.data[self.numeric_columns].iloc[i:i+self.window_size].values
                y = self.data[self.price_column].iloc[i+self.window_size:i+self.window_size+self.prediction_length].values
                self.sequences.append((x, y))
        else:
            self.sequences = [self.data[self.numeric_columns].values]

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        if not self.is_test:
            x, y = self.sequences[idx]
            return torch.FloatTensor(x), torch.FloatTensor(y)
        else:
            return torch.FloatTensor(self.sequences[idx])

# Training function with mixed precision training
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_loader:

        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

# Evaluation function
def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:

            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            total_loss += loss.item()
    return total_loss / len(test_loader)

# Main training loop
def train_and_predict(품목_리스트):
    품목별_predictions = {}
    품목별_scalers = {}
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    pbar_outer = tqdm(품목_리스트, desc="품목 처리 중", position=0)
    for 품목명 in pbar_outer:
        pbar_outer.set_description(f"품목별 전처리 및 모델 학습 -> {품목명}")

        # Data preprocessing
        train_data, scaler,last_observed_value = process_data("./train/train.csv",
                                      "./train/meta/TRAIN_산지공판장_2018-2021.csv",
                                      "./train/meta/TRAIN_전국도매_2018-2021.csv",
                                      품목명)
        품목별_scalers[품목명] = scaler

        # Dataset and DataLoader setup
        dataset = AgriculturePriceDataset(train_data)
        train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)
        train_loader = DataLoader(train_data, CFG.batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_data, CFG.batch_size, shuffle=False, num_workers=4)

        # Model setup
        input_size = len(dataset.numeric_columns)
        model = PricePredictionGRU(input_size, CFG.hidden_size, CFG.num_layers, CFG.output_size)
        model = model.to(device)

        # Training setup
        criterion = nn.L1Loss()
        optimizer = torch.optim.AdamW(model.parameters(), lr=CFG.learning_rate, weight_decay=CFG.weight_decay)

        # Training loop
        best_val_loss = float('inf')
        os.makedirs('models', exist_ok=True)
        os.makedirs('logs', exist_ok=True)
        log_file_path = os.path.join('logs', f'{품목명}_training_log.txt')
        # for epoch in range(CFG.epoch):
        #     train_loss = train_model(model, train_loader, criterion, optimizer, scheduler, scaler, device)
        #     val_loss = evaluate_model(model, val_loader, criterion, device)

        #     if val_loss < best_val_loss:
        #         best_val_loss = val_loss
        #         torch.save(model.state_dict(), f'models/best_model_{품목명}.pth')

        #     print(f'Epoch {epoch+1}/{CFG.epoch}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
        with open(log_file_path, 'w') as log_file:
            for epoch in range(CFG.epoch):
                train_loss = train_model(model, train_loader, criterion, optimizer, CFG.epoch, device)
                val_loss = evaluate_model(model, val_loader, criterion, device)

                # Best Validation Loss 갱신 시 모델 저장
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    model_save_path = os.path.join('models', f'best_model_{품목명}.pth')
                    print(f"Saving model to: {model_save_path}")
                    torch.save(model.state_dict(), model_save_path)

                # Epoch 별 Train Loss, Val Loss 출력 및 텍스트 파일에 기록
                log_text = f'Epoch {epoch+1}/{CFG.epoch}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}\n'
                print(log_text)  # 콘솔 출력
                log_file.write(log_text)  # 파일에 저장
         # 추론
        품목_predictions = []
        pbar_inner = tqdm(range(25), desc="테스트 파일 추론 중", position=1, leave=False)

        for i in pbar_inner:
            test_file = f"./test/TEST_{i:02d}.csv"
            산지공판장_file = f"./test/meta/TEST_산지공판장_{i:02d}.csv"
            전국도매_file = f"./test/meta/TEST_전국도매_{i:02d}.csv"

            test_data, _, last_observed_value = process_data(test_file, 산지공판장_file, 전국도매_file, 품목명, scaler=품목별_scalers[품목명])
            test_dataset = AgriculturePriceDataset(test_data, is_test=True)
            test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

            model.eval()
            predictions = []
            with torch.no_grad():
                for batch in test_loader:
                    batch = batch.to(device)
                    output = model(batch)
                    predictions.append(output.cpu().numpy())


            predictions_array = np.concatenate(predictions)
            # price_column_index = test_data.columns.get_loc(test_dataset.price_column)
            price_column_index = dataset.numeric_columns.index('평균가격(원)')
            predictions_reshaped = predictions_array.reshape(-1, 1)

            price_scaler = MinMaxScaler()
            price_scaler.min_ = 품목별_scalers[품목명].min_[price_column_index]
            price_scaler.scale_ = 품목별_scalers[품목명].scale_[price_column_index]
            predictions_original_scale = price_scaler.inverse_transform(predictions_reshaped)
            # 복원 처리
            predictions_original_scale = predictions_original_scale + last_observed_value

            if np.isnan(predictions_original_scale).any():
                pbar_inner.set_postfix({"상태": "NaN"})
            else:
                pbar_inner.set_postfix({"상태": "정상"})
                품목_predictions.extend(predictions_original_scale.flatten())

        품목별_predictions[품목명] = 품목_predictions
        pbar_outer.update(1)

    return 품목별_predictions

# 훈련 및 예측 실행
품목별_predictions = train_and_predict(품목_리스트)

# 제출 파일 준비
sample_submission = pd.read_csv('./sample_submission.csv')
for 품목명, predictions in 품목별_predictions.items():
    sample_submission[품목명] = predictions

# 결과 저장
sample_submission.to_csv('./제발_제출3.csv', index=False)

품목 처리 중:   0%|          | 0/10 [00:00<?, ?it/s]

Saving model to: models/best_model_건고추.pth
Epoch 1/50, Train Loss: 0.2449, Val Loss: 0.4499

Saving model to: models/best_model_건고추.pth
Epoch 2/50, Train Loss: 0.3124, Val Loss: 0.1259

Epoch 3/50, Train Loss: 0.1633, Val Loss: 0.1860

Saving model to: models/best_model_건고추.pth
Epoch 4/50, Train Loss: 0.1765, Val Loss: 0.1044

Saving model to: models/best_model_건고추.pth
Epoch 5/50, Train Loss: 0.0927, Val Loss: 0.0599

Saving model to: models/best_model_건고추.pth
Epoch 6/50, Train Loss: 0.0725, Val Loss: 0.0576

Saving model to: models/best_model_건고추.pth
Epoch 7/50, Train Loss: 0.0606, Val Loss: 0.0572

Epoch 8/50, Train Loss: 0.0718, Val Loss: 0.0714

Saving model to: models/best_model_건고추.pth
Epoch 9/50, Train Loss: 0.0779, Val Loss: 0.0500

Epoch 10/50, Train Loss: 0.0577, Val Loss: 0.0521

Epoch 11/50, Train Loss: 0.0682, Val Loss: 0.0581

Epoch 12/50, Train Loss: 0.0738, Val Loss: 0.0559

Saving model to: models/best_model_건고추.pth
Epoch 13/50, Train Loss: 0.0651, Val Loss: 0.0391

Ep

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_사과.pth
Epoch 1/50, Train Loss: 0.3882, Val Loss: 0.2805

Saving model to: models/best_model_사과.pth
Epoch 2/50, Train Loss: 0.1917, Val Loss: 0.1798

Saving model to: models/best_model_사과.pth
Epoch 3/50, Train Loss: 0.1533, Val Loss: 0.1467

Saving model to: models/best_model_사과.pth
Epoch 4/50, Train Loss: 0.1172, Val Loss: 0.0882

Epoch 5/50, Train Loss: 0.0999, Val Loss: 0.1114

Epoch 6/50, Train Loss: 0.0983, Val Loss: 0.0882

Epoch 7/50, Train Loss: 0.0785, Val Loss: 0.1018

Epoch 8/50, Train Loss: 0.0921, Val Loss: 0.0905

Epoch 9/50, Train Loss: 0.0749, Val Loss: 0.0947

Saving model to: models/best_model_사과.pth
Epoch 10/50, Train Loss: 0.0813, Val Loss: 0.0859

Saving model to: models/best_model_사과.pth
Epoch 11/50, Train Loss: 0.0800, Val Loss: 0.0806

Epoch 12/50, Train Loss: 0.0759, Val Loss: 0.0983

Epoch 13/50, Train Loss: 0.0815, Val Loss: 0.0934

Epoch 14/50, Train Loss: 0.0794, Val Loss: 0.0850

Epoch 15/50, Train Loss: 0.0779, Val Loss: 

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_감자.pth
Epoch 1/50, Train Loss: 0.3576, Val Loss: 0.3071

Saving model to: models/best_model_감자.pth
Epoch 2/50, Train Loss: 0.1956, Val Loss: 0.1641

Saving model to: models/best_model_감자.pth
Epoch 3/50, Train Loss: 0.1606, Val Loss: 0.1217

Saving model to: models/best_model_감자.pth
Epoch 4/50, Train Loss: 0.1088, Val Loss: 0.1050

Saving model to: models/best_model_감자.pth
Epoch 5/50, Train Loss: 0.1096, Val Loss: 0.0956

Epoch 6/50, Train Loss: 0.0876, Val Loss: 0.1070

Epoch 7/50, Train Loss: 0.0976, Val Loss: 0.1004

Saving model to: models/best_model_감자.pth
Epoch 8/50, Train Loss: 0.0839, Val Loss: 0.0852

Epoch 9/50, Train Loss: 0.0786, Val Loss: 0.0903

Saving model to: models/best_model_감자.pth
Epoch 10/50, Train Loss: 0.0876, Val Loss: 0.0847

Epoch 11/50, Train Loss: 0.0785, Val Loss: 0.0902

Epoch 12/50, Train Loss: 0.0777, Val Loss: 0.0955

Epoch 13/50, Train Loss: 0.0854, Val Loss: 0.0855

Epoch 14/50, Train Loss: 0.0743, Val Loss: 0.0887

S

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_배.pth
Epoch 1/50, Train Loss: 0.5558, Val Loss: 0.0933

Epoch 2/50, Train Loss: 0.1114, Val Loss: 0.1195

Epoch 3/50, Train Loss: 0.0921, Val Loss: 0.1233

Epoch 4/50, Train Loss: 0.1079, Val Loss: 0.0985

Saving model to: models/best_model_배.pth
Epoch 5/50, Train Loss: 0.0802, Val Loss: 0.0858

Epoch 6/50, Train Loss: 0.0810, Val Loss: 0.1029

Saving model to: models/best_model_배.pth
Epoch 7/50, Train Loss: 0.0884, Val Loss: 0.0835

Saving model to: models/best_model_배.pth
Epoch 8/50, Train Loss: 0.0737, Val Loss: 0.0631

Epoch 9/50, Train Loss: 0.0658, Val Loss: 0.0786

Epoch 10/50, Train Loss: 0.0659, Val Loss: 0.0686

Epoch 11/50, Train Loss: 0.0575, Val Loss: 0.0668

Epoch 12/50, Train Loss: 0.0619, Val Loss: 0.0676

Saving model to: models/best_model_배.pth
Epoch 13/50, Train Loss: 0.0551, Val Loss: 0.0514

Epoch 14/50, Train Loss: 0.0452, Val Loss: 0.0562

Saving model to: models/best_model_배.pth
Epoch 15/50, Train Loss: 0.0524, Val Loss: 0.0484

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 1/50, Train Loss: 0.2569, Val Loss: 0.3930

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 2/50, Train Loss: 0.2576, Val Loss: 0.1629

Epoch 3/50, Train Loss: 0.1757, Val Loss: 0.2032

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 4/50, Train Loss: 0.1738, Val Loss: 0.1261

Epoch 5/50, Train Loss: 0.0909, Val Loss: 0.1435

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 6/50, Train Loss: 0.1196, Val Loss: 0.1222

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 7/50, Train Loss: 0.0909, Val Loss: 0.1045

Epoch 8/50, Train Loss: 0.0886, Val Loss: 0.1257

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 9/50, Train Loss: 0.0948, Val Loss: 0.0962

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 10/50, Train Loss: 0.0633, Val Loss: 0.0836

Epoch 11/50, Train Loss: 0.0626, Val Loss: 0.0906

Saving model to: models/best_model_깐마늘(국산).pth
Epoch 12/50, Train Loss: 0.0643, Val Loss: 0.0819

Epoch 13/50, Train Lo

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_무.pth
Epoch 1/50, Train Loss: 0.3517, Val Loss: 0.2742

Saving model to: models/best_model_무.pth
Epoch 2/50, Train Loss: 0.1737, Val Loss: 0.1632

Saving model to: models/best_model_무.pth
Epoch 3/50, Train Loss: 0.1737, Val Loss: 0.1500

Saving model to: models/best_model_무.pth
Epoch 4/50, Train Loss: 0.1323, Val Loss: 0.1103

Epoch 5/50, Train Loss: 0.1046, Val Loss: 0.1287

Saving model to: models/best_model_무.pth
Epoch 6/50, Train Loss: 0.1059, Val Loss: 0.1008

Epoch 7/50, Train Loss: 0.0965, Val Loss: 0.1176

Epoch 8/50, Train Loss: 0.1055, Val Loss: 0.1031

Epoch 9/50, Train Loss: 0.0938, Val Loss: 0.1045

Epoch 10/50, Train Loss: 0.0954, Val Loss: 0.1120

Epoch 11/50, Train Loss: 0.0971, Val Loss: 0.1050

Saving model to: models/best_model_무.pth
Epoch 12/50, Train Loss: 0.0899, Val Loss: 0.1007

Saving model to: models/best_model_무.pth
Epoch 13/50, Train Loss: 0.0905, Val Loss: 0.1001

Epoch 14/50, Train Loss: 0.0940, Val Loss: 0.1003

Epoch 15

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_상추.pth
Epoch 1/50, Train Loss: 0.4023, Val Loss: 0.2373

Saving model to: models/best_model_상추.pth
Epoch 2/50, Train Loss: 0.1669, Val Loss: 0.1384

Saving model to: models/best_model_상추.pth
Epoch 3/50, Train Loss: 0.1347, Val Loss: 0.1144

Saving model to: models/best_model_상추.pth
Epoch 4/50, Train Loss: 0.0956, Val Loss: 0.1089

Saving model to: models/best_model_상추.pth
Epoch 5/50, Train Loss: 0.1075, Val Loss: 0.0848

Epoch 6/50, Train Loss: 0.0835, Val Loss: 0.1074

Epoch 7/50, Train Loss: 0.0978, Val Loss: 0.0851

Epoch 8/50, Train Loss: 0.0818, Val Loss: 0.0882

Epoch 9/50, Train Loss: 0.0849, Val Loss: 0.0893

Saving model to: models/best_model_상추.pth
Epoch 10/50, Train Loss: 0.0837, Val Loss: 0.0824

Epoch 11/50, Train Loss: 0.0779, Val Loss: 0.0864

Epoch 12/50, Train Loss: 0.0802, Val Loss: 0.0845

Saving model to: models/best_model_상추.pth
Epoch 13/50, Train Loss: 0.0779, Val Loss: 0.0811

Epoch 14/50, Train Loss: 0.0777, Val Loss: 0.0847

E

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_배추.pth
Epoch 1/50, Train Loss: 0.4416, Val Loss: 0.1927

Saving model to: models/best_model_배추.pth
Epoch 2/50, Train Loss: 0.1564, Val Loss: 0.1215

Saving model to: models/best_model_배추.pth
Epoch 3/50, Train Loss: 0.1083, Val Loss: 0.0900

Epoch 4/50, Train Loss: 0.0981, Val Loss: 0.1089

Epoch 5/50, Train Loss: 0.0938, Val Loss: 0.1012

Epoch 6/50, Train Loss: 0.1020, Val Loss: 0.0959

Saving model to: models/best_model_배추.pth
Epoch 7/50, Train Loss: 0.1011, Val Loss: 0.0881

Epoch 8/50, Train Loss: 0.0957, Val Loss: 0.0906

Epoch 9/50, Train Loss: 0.0985, Val Loss: 0.0913

Saving model to: models/best_model_배추.pth
Epoch 10/50, Train Loss: 0.0894, Val Loss: 0.0830

Saving model to: models/best_model_배추.pth
Epoch 11/50, Train Loss: 0.0883, Val Loss: 0.0825

Epoch 12/50, Train Loss: 0.0866, Val Loss: 0.0915

Saving model to: models/best_model_배추.pth
Epoch 13/50, Train Loss: 0.0862, Val Loss: 0.0809

Epoch 14/50, Train Loss: 0.0903, Val Loss: 0.0838

E

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_양파.pth
Epoch 1/50, Train Loss: 0.3516, Val Loss: 0.2846

Saving model to: models/best_model_양파.pth
Epoch 2/50, Train Loss: 0.1835, Val Loss: 0.1710

Saving model to: models/best_model_양파.pth
Epoch 3/50, Train Loss: 0.1744, Val Loss: 0.1180

Epoch 4/50, Train Loss: 0.1079, Val Loss: 0.1291

Saving model to: models/best_model_양파.pth
Epoch 5/50, Train Loss: 0.1131, Val Loss: 0.1006

Epoch 6/50, Train Loss: 0.1015, Val Loss: 0.1228

Saving model to: models/best_model_양파.pth
Epoch 7/50, Train Loss: 0.1096, Val Loss: 0.0991

Epoch 8/50, Train Loss: 0.0942, Val Loss: 0.1084

Epoch 9/50, Train Loss: 0.0984, Val Loss: 0.1008

Epoch 10/50, Train Loss: 0.0912, Val Loss: 0.1035

Saving model to: models/best_model_양파.pth
Epoch 11/50, Train Loss: 0.0922, Val Loss: 0.0971

Saving model to: models/best_model_양파.pth
Epoch 12/50, Train Loss: 0.0914, Val Loss: 0.0954

Epoch 13/50, Train Loss: 0.0897, Val Loss: 0.1026

Epoch 14/50, Train Loss: 0.0897, Val Loss: 0.0961

S

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]

Saving model to: models/best_model_대파.pth
Epoch 1/50, Train Loss: 0.4255, Val Loss: 0.2077

Saving model to: models/best_model_대파.pth
Epoch 2/50, Train Loss: 0.1724, Val Loss: 0.1503

Saving model to: models/best_model_대파.pth
Epoch 3/50, Train Loss: 0.1248, Val Loss: 0.1012

Saving model to: models/best_model_대파.pth
Epoch 4/50, Train Loss: 0.1121, Val Loss: 0.0886

Epoch 5/50, Train Loss: 0.0989, Val Loss: 0.1063

Saving model to: models/best_model_대파.pth
Epoch 6/50, Train Loss: 0.1028, Val Loss: 0.0865

Epoch 7/50, Train Loss: 0.0917, Val Loss: 0.0941

Epoch 8/50, Train Loss: 0.0978, Val Loss: 0.0957

Saving model to: models/best_model_대파.pth
Epoch 9/50, Train Loss: 0.0977, Val Loss: 0.0808

Epoch 10/50, Train Loss: 0.0924, Val Loss: 0.0949

Epoch 11/50, Train Loss: 0.0925, Val Loss: 0.0819

Epoch 12/50, Train Loss: 0.0892, Val Loss: 0.0815

Epoch 13/50, Train Loss: 0.0917, Val Loss: 0.0859

Epoch 14/50, Train Loss: 0.0893, Val Loss: 0.0880

Epoch 15/50, Train Loss: 0.0894, Val Loss: 

테스트 파일 추론 중:   0%|          | 0/25 [00:00<?, ?it/s]