## Import

In [32]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import math
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [33]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [34]:
CFG = {
    'TRAIN_WINDOW_SIZE':90, # 90일치로 학습 pred_len
    'PREDICT_SIZE':21, # 21일치 예측 label_len
    'freq':'d',# seasonal, trend 정보를 어떤걸 기준으로 할지
    'enc_in':1,#encoder input size
    'dec_in':1,#decoder input size
    'embed':'fixed',#time features encoding, options:[timeF, fixed, learned]
    'drop_out':0.1,
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':512,
    'd_model':512,
    'd_ff':2048,
    'e_layers':2,
    'd_layers':1,
    'n_heads': 8,
    'factor': 1,
    'c_out': 1,
    'SEED':41,
    'output_attention':'store_true'
}

In [35]:
def min_max_scaler(train_data):
    scale_max_dict = {}
    scale_min_dict = {}

    numeric_cols = train_data.columns[2:]

    # 각 행의 최댓값과 최솟값 계산
    min_values = train_data[numeric_cols].min(axis=1)
    max_values = train_data[numeric_cols].max(axis=1)
    print(max(max_values))

    # 각 행의 범위(max - min) 계산하고, 범위가 0인 경우 1로 대체
    ranges = max_values - min_values
    ranges[ranges == 0] = 1

    # min-max scaling 수행
    train_data[numeric_cols] = (train_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)

    # max와 min 값을 dictionary 형태로 저장
    scale_min_dict = min_values.to_dict()
    scale_max_dict = max_values.to_dict()

    # Label Encoding
    label_encoder = LabelEncoder()
    categorical_columns = ['대분류', '중분류']#, '대분류',]

    for col in categorical_columns:
        label_encoder.fit(train_data[col])
        train_data[col] = label_encoder.transform(train_data[col])

    return train_data, scale_max_dict, scale_min_dict

### 데이터 불러오기

In [36]:
import pandas as pd
import warnings

# 경고 메시지 무시
warnings.filterwarnings('ignore', category=pd.errors.PerformanceWarning)


# 데이터를 읽어옵니다.
train_df = pd.read_csv('./data/train.csv').drop(columns=['ID', '제품', '소분류', '브랜드']).head(10)
train_df, scale_max_dict, scale_min_dict = min_max_scaler(train_df)

date_columns = [col for col in train_df.columns if '-' in col]
date_info = pd.DataFrame(date_columns, columns=["full_date"])

# 연, 월, 일로 분리
date_info["year"] = date_info["full_date"].apply(lambda x: int(x.split("-")[0]))
date_info["month"] = date_info["full_date"].apply(lambda x: int(x.split("-")[1]))
date_info["day"] = date_info["full_date"].apply(lambda x: int(x.split("-")[2]))

new_columns = []

# 연, 월, 일 정보를 저장할 새로운 DataFrame을 생성합니다.
for idx, col in enumerate(date_columns):
    year_col = col + "_year"
    month_col = col + "_month"
    day_col = col + "_day"
    
    train_df[year_col] = [date_info.iloc[idx]["year"]] * len(train_df)
    train_df[month_col] = [date_info.iloc[idx]["month"]] * len(train_df)
    train_df[day_col] = [date_info.iloc[idx]["day"]] * len(train_df)
    
    new_columns.extend([year_col, month_col, day_col])

# 경고를 방지하기 위해 DataFrame을 복사합니다.
train_df = train_df.copy()


130


### 데이터 전처리

In [37]:
from joblib import Parallel, delayed

def process_row(i, data, sales_cols, train_size, predict_size, window_size, encode_info):
    num_rows = len(data)
    input_data_row = np.empty((len(sales_cols) - window_size + 1, train_size, 6))
    target_data_row = np.empty((len(sales_cols) - window_size + 1, predict_size, 4))
    
    for j in range(len(sales_cols) - window_size + 1):
        sales_window = data[sales_cols[j: j + train_size]].iloc[i].values
        year_window = (data[[col + "_year" for col in sales_cols[j: j + train_size]]].iloc[i].values - 2022)
        month_window = data[[col + "_month" for col in sales_cols[j: j + train_size]]].iloc[i].values
        day_window = data[[col + "_day" for col in sales_cols[j: j + train_size]]].iloc[i].values
        
        combined_window = np.column_stack((np.tile(encode_info, (train_size, 1)), sales_window, year_window, month_window, day_window))
        input_data_row[j] = combined_window

        target_year = (data[[col + "_year" for col in sales_cols[j + train_size: j + window_size]]].iloc[i].values - 2022)
        target_month = data[[col + "_month" for col in sales_cols[j + train_size: j + window_size]]].iloc[i].values
        target_day = data[[col + "_day" for col in sales_cols[j + train_size: j + window_size]]].iloc[i].values
        target_sales = data[sales_cols[j + train_size: j + window_size]].iloc[i].values

        combined_target = np.column_stack((target_sales, target_year, target_month, target_day))
        target_data_row[j] = combined_target

    return input_data_row, target_data_row

def make_train_data_parallel(data, train_size, predict_size):
    num_rows = len(data)
    window_size = train_size + predict_size
    
    sales_cols = [col for col in data.columns if '-' in col and not any(substr in col for substr in ['_year', '_month', '_day'])]
    
    input_data = np.empty((num_rows * (len(sales_cols) - window_size + 1), train_size, 6))
    target_data = np.empty((num_rows * (len(sales_cols) - window_size + 1), predict_size, 4))
    
    results = Parallel(n_jobs=-1)(delayed(process_row)(i, data, sales_cols, train_size, predict_size, window_size, data.iloc[i, :2].values) for i in range(num_rows))
    
    for i, (input_data_row, target_data_row) in enumerate(results):
        input_data[i * (len(sales_cols) - window_size + 1): (i + 1) * (len(sales_cols) - window_size + 1)] = input_data_row
        target_data[i * (len(sales_cols) - window_size + 1): (i + 1) * (len(sales_cols) - window_size + 1)] = target_data_row

    return input_data, target_data


In [38]:
train_input, train_target = make_train_data_parallel(train_df, CFG['TRAIN_WINDOW_SIZE'], CFG['PREDICT_SIZE'])

In [39]:
# Train / Validation Split
data_len = len(train_input)
val_input = train_input[-int(data_len*0.2):]
val_target = train_target[-int(data_len*0.2):]
train_input = train_input[:-int(data_len*0.2)]
train_target = train_target[:-int(data_len*0.2)]

### Custom Dataset

In [40]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = torch.Tensor(X)
        self.Y = torch.Tensor(Y)
        self.sales_feature_size = 3  # 대분류, 중분류, 판매량
        self.time_feature_size = 3  # 연, 월, 일
            
    def __getitem__(self, index):
    # 판매량 추출
        sales_data = self.X[index, :, 2:3]  # 판매량은 3번째 칼럼에 위치한다고 가정
        # 대분류, 중분류 정보 추출
        product_data = self.X[index, :, :2]  # 대분류와 중분류는 처음 두 칼럼에 위치한다고 가정
        
        # 연, 월, 일 정보 추출
        input_time_features = self.X[index, :, self.sales_feature_size:]
        
        # target_data에서 판매량 및 연, 월, 일 정보 추출
        target_sales = self.Y[index, :, 0]
        target_time_features = self.Y[index, :, 1:]
        
        return product_data.long(), sales_data, target_sales, input_time_features, target_time_features


        
    def __len__(self):
        return len(self.X)


In [41]:
train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)


### 모델 학습

## Autoformer Embedding layer

In [42]:
def compared_version(ver1, ver2):
    """
    :param ver1
    :param ver2
    :return: ver1< = >ver2 False/True
    """
    list1 = str(ver1).split(".")
    list2 = str(ver2).split(".")
    
    for i in range(len(list1)) if len(list1) < len(list2) else range(len(list2)):
        if int(list1[i]) == int(list2[i]):
            pass
        elif int(list1[i]) < int(list2[i]):
            return -1
        else:
            return 1
    
    if len(list1) == len(list2):
        return True
    elif len(list1) < len(list2):
        return False
    else:
        return True

In [43]:
class TimeFeatureEmbedding(nn.Module):
    def __init__(self, d_model, embed_type='timeF', freq='d'):
        super(TimeFeatureEmbedding, self).__init__()

        freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
        d_inp = freq_map[freq]
        self.embed = nn.Linear(d_inp, d_model, bias=False)

    def forward(self, x):
        return self.embed(x)

In [44]:
class FixedEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super(FixedEmbedding, self).__init__()

        w = torch.zeros(c_in, d_model).float()
        w.require_grad = False

        position = torch.arange(0, c_in).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        w[:, 0::2] = torch.sin(position * div_term)
        w[:, 1::2] = torch.cos(position * div_term)

        self.emb = nn.Embedding(c_in, d_model)
        self.emb.weight = nn.Parameter(w, requires_grad=False)

    def forward(self, x):
        return self.emb(x).detach()

In [45]:
class DataEmbedding_wo_pos(nn.Module):
    def __init__(self, c_in, d_model, major_size,minor_size, embed_type='fixed', freq='d', dropout=0.1):#대분류,중분류 사이즈별 임베딩
        super(DataEmbedding_wo_pos, self).__init__()
        self.major_embedding = nn.Embedding(major_size, d_model)
        self.minor_embedding = nn.Embedding(minor_size, d_model)

        
        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
        self.position_embedding = PositionalEmbedding(d_model=d_model)
        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, product_data, x, x_mark):
        x_embed = self.value_embedding(x)
        
        major_info = product_data[:, :, 0] # 대분류
        minor_info = product_data[:, :, 1] # 중분류
        
        major_embed = self.major_embedding(major_info)
        minor_embed = self.minor_embedding(minor_info)
        
        product_embed = major_embed + minor_embed
        
        x = x_embed + product_embed + self.temporal_embedding(x_mark)
        
        return self.dropout(x)



In [46]:
class TokenEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super(TokenEmbedding, self).__init__()
        padding = 1 if compared_version(torch.__version__, '1.5.0') else 2
        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
                                   kernel_size=3, padding=padding, padding_mode='circular', bias=False)
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')

    def forward(self, x):
        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
        return x

In [47]:
class TemporalEmbedding(nn.Module):
    def __init__(self, d_model, embed_type='fixed', freq='d'):  # freq를 'd'로 변경
        super(TemporalEmbedding, self).__init__()

        year_size = 2  # 예시로 임의의 년도 크기 사용 (2022, 2023)
        month_size = 13
        day_size = 32

        Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
        if freq == 'd':
            self.year_embed = Embed(year_size, d_model)
            self.month_embed = Embed(month_size, d_model)
            self.day_embed = Embed(day_size, d_model)

    def forward(self, x):
        x = x.long()

        year_x = self.year_embed(x[:, :, 0])
        month_x = self.month_embed(x[:, :, 1])
        day_x = self.day_embed(x[:, :, 2])

        return year_x + month_x + day_x


In [48]:
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEmbedding, self).__init__()
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]

## AutoCorellation

In [49]:
class AutoCorrelation(nn.Module):
    """
    AutoCorrelation Mechanism with the following two phases:
    (1) period-based dependencies discovery
    (2) time delay aggregation
    This block can replace the self-attention family mechanism seamlessly.
    """
    def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
        super(AutoCorrelation, self).__init__()
        self.factor = factor
        self.scale = scale
        self.mask_flag = mask_flag
        self.output_attention = output_attention
        self.dropout = nn.Dropout(attention_dropout)

    def time_delay_agg_training(self, values, corr):
        """
        SpeedUp version of Autocorrelation (a batch-normalization style design)
        This is for the training phase.
        """
        head = values.shape[1]
        channel = values.shape[2]
        length = values.shape[3]
        # find top k
        top_k = int(self.factor * math.log(length))
        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
        index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
        weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
        # update corr
        tmp_corr = torch.softmax(weights, dim=-1)
        # aggregation
        tmp_values = values
        delays_agg = torch.zeros_like(values).float()
        for i in range(top_k):
            pattern = torch.roll(tmp_values, -int(index[i]), -1)
            delays_agg = delays_agg + pattern * \
                         (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
        return delays_agg

    def time_delay_agg_inference(self, values, corr):
        """
        SpeedUp version of Autocorrelation (a batch-normalization style design)
        This is for the inference phase.
        """
        batch = values.shape[0]
        head = values.shape[1]
        channel = values.shape[2]
        length = values.shape[3]
        # index init
        init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0)\
            .repeat(batch, head, channel, 1).to(values.device)
        # find top k
        top_k = int(self.factor * math.log(length))
        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
        weights, delay = torch.topk(mean_value, top_k, dim=-1)
        # update corr
        tmp_corr = torch.softmax(weights, dim=-1)
        # aggregation
        tmp_values = values.repeat(1, 1, 1, 2)
        delays_agg = torch.zeros_like(values).float()
        for i in range(top_k):
            tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
            pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
            delays_agg = delays_agg + pattern * \
                         (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
        return delays_agg

    def time_delay_agg_full(self, values, corr):
        """
        Standard version of Autocorrelation
        """
        batch = values.shape[0]
        head = values.shape[1]
        channel = values.shape[2]
        length = values.shape[3]
        # index init
        init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0)\
            .repeat(batch, head, channel, 1).to(values.device)
        # find top k
        top_k = int(self.factor * math.log(length))
        weights, delay = torch.topk(corr, top_k, dim=-1)
        # update corr
        tmp_corr = torch.softmax(weights, dim=-1)
        # aggregation
        tmp_values = values.repeat(1, 1, 1, 2)
        delays_agg = torch.zeros_like(values).float()
        for i in range(top_k):
            tmp_delay = init_index + delay[..., i].unsqueeze(-1)
            pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
            delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
        return delays_agg

    def forward(self, queries, keys, values, attn_mask):
        B, L, H, E = queries.shape
        _, S, _, D = values.shape
        if L > S:
            zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
            values = torch.cat([values, zeros], dim=1)
            keys = torch.cat([keys, zeros], dim=1)
        else:
            values = values[:, :L, :, :]
            keys = keys[:, :L, :, :]

        # period-based dependencies
        q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
        k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
        res = q_fft * torch.conj(k_fft)
        corr = torch.fft.irfft(res, n=L, dim=-1)

        # time delay agg
        if self.training:
            V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
        else:
            V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)

        if self.output_attention:
            return (V.contiguous(), corr.permute(0, 3, 1, 2))
        else:
            return (V.contiguous(), None)

In [50]:
class AutoCorrelationLayer(nn.Module):
    def __init__(self, correlation, d_model, n_heads, d_keys=None,
                 d_values=None):
        super(AutoCorrelationLayer, self).__init__()

        d_keys = d_keys or (d_model // n_heads)
        d_values = d_values or (d_model // n_heads)

        self.inner_correlation = correlation
        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
        self.value_projection = nn.Linear(d_model, d_values * n_heads)
        self.out_projection = nn.Linear(d_values * n_heads, d_model)
        self.n_heads = n_heads

    def forward(self, queries, keys, values, attn_mask):
        B, L, _ = queries.shape
        _, S, _ = keys.shape
        H = self.n_heads

        queries = self.query_projection(queries).view(B, L, H, -1)
        keys = self.key_projection(keys).view(B, S, H, -1)
        values = self.value_projection(values).view(B, S, H, -1)

        out, attn = self.inner_correlation(
            queries,
            keys,
            values,
            attn_mask
        )
        out = out.view(B, L, -1)

        return self.out_projection(out), attn

## Autoformer Encoder

In [51]:
class my_Layernorm(nn.Module):
    """
    Special designed layernorm for the seasonal part
    """
    def __init__(self, channels):
        super(my_Layernorm, self).__init__()
        self.layernorm = nn.LayerNorm(channels)

    def forward(self, x):
        x_hat = self.layernorm(x)
        bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
        return x_hat - bias

In [52]:
class EncoderLayer(nn.Module):
    """
    Autoformer encoder layer with the progressive decomposition architecture
    """
    def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
        super(EncoderLayer, self).__init__()
        d_ff = d_ff or 4 * d_model
        self.attention = attention
        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
        self.decomp1 = series_decomp(moving_avg)
        self.decomp2 = series_decomp(moving_avg)
        self.dropout = nn.Dropout(dropout)
        self.activation = F.relu if activation == "relu" else F.gelu

    def forward(self, x, attn_mask=None):
        new_x, attn = self.attention(
            x, x, x,
            attn_mask=attn_mask
        )
        x = x + self.dropout(new_x)
        x, _ = self.decomp1(x)
        y = x
        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
        y = self.dropout(self.conv2(y).transpose(-1, 1))
        res, _ = self.decomp2(x + y)
        return res, attn

In [53]:
class Encoder(nn.Module):
    """
    Autoformer encoder
    """
    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
        super(Encoder, self).__init__()
        self.attn_layers = nn.ModuleList(attn_layers)
        self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
        self.norm = norm_layer

    def forward(self, x, attn_mask=None):
        attns = []
        if self.conv_layers is not None:
            for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
                x, attn = attn_layer(x, attn_mask=attn_mask)
                x = conv_layer(x)
                attns.append(attn)
            x, attn = self.attn_layers[-1](x)
            attns.append(attn)
        else:
            for attn_layer in self.attn_layers:
                x, attn = attn_layer(x, attn_mask=attn_mask)
                attns.append(attn)

        if self.norm is not None:
            x = self.norm(x)

        return x, attns

In [54]:
class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

In [55]:
class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean

## Autoformer Decoder

In [56]:
class DecoderLayer(nn.Module):
    """
    Autoformer decoder layer with the progressive decomposition architecture
    """
    def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
                 moving_avg=25, dropout=0.1, activation="relu"):
        super(DecoderLayer, self).__init__()
        d_ff = d_ff or 4 * d_model
        self.self_attention = self_attention
        self.cross_attention = cross_attention
        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
        self.decomp1 = series_decomp(moving_avg)
        self.decomp2 = series_decomp(moving_avg)
        self.decomp3 = series_decomp(moving_avg)
        self.dropout = nn.Dropout(dropout)
        self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
                                    padding_mode='circular', bias=False)
        self.activation = F.relu if activation == "relu" else F.gelu

    def forward(self, x, cross, x_mask=None, cross_mask=None):
        x = x + self.dropout(self.self_attention(
            x, x, x,
            attn_mask=x_mask
        )[0])
        x, trend1 = self.decomp1(x)
        x = x + self.dropout(self.cross_attention(
            x, cross, cross,
            attn_mask=cross_mask
        )[0])
        x, trend2 = self.decomp2(x)
        y = x
        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
        y = self.dropout(self.conv2(y).transpose(-1, 1))
        x, trend3 = self.decomp3(x + y)

        residual_trend = trend1 + trend2 + trend3
        residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
        return x, residual_trend


In [57]:
class Decoder(nn.Module):
    """
    Autoformer encoder
    """
    def __init__(self, layers, norm_layer=None, projection=None):
        super(Decoder, self).__init__()
        self.layers = nn.ModuleList(layers)
        self.norm = norm_layer
        self.projection = projection

    def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
        for layer in self.layers:
            x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
            trend = trend + residual_trend

        if self.norm is not None:
            x = self.norm(x)

        if self.projection is not None:
            x = self.projection(x)
        return x, trend

## Model

In [58]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.seq_len = CFG['TRAIN_WINDOW_SIZE'] + CFG['PREDICT_SIZE'] # 전체 사용할 데이터 사이즈
        self.label_len = CFG['TRAIN_WINDOW_SIZE']
        self.pred_len = CFG['PREDICT_SIZE']
        self.output_attention = CFG['output_attention']

        #Decomp
        kernel_size = 25 #이동평균 일단 5일로설정
        self.decomp = series_decomp(kernel_size)

        # Embedding
        # The series-wise connection inherently contains the sequential information.
        # Thus, we can discard the position embedding of transformers.
        self.enc_embedding = DataEmbedding_wo_pos(CFG['enc_in'], CFG['d_model'], 5, 11, CFG['embed'], CFG['freq'], CFG['drop_out'])



        self.dec_embedding = DataEmbedding_wo_pos(CFG['enc_in'], CFG['d_model'], 5, 11, CFG['embed'], CFG['freq'], CFG['drop_out'])


        # Encoder
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AutoCorrelationLayer(
                        AutoCorrelation(False, CFG['factor'], attention_dropout=CFG['drop_out'],
                                        output_attention=CFG['output_attention']),
                        CFG['d_model'], CFG['n_heads']),
                    CFG['d_model'],
                    CFG['d_ff'],
                    moving_avg=25,
                    dropout=0.1,
                    activation='gelu'
                ) for l in range(CFG['e_layers'])
            ],
            norm_layer=my_Layernorm(CFG['d_model'])
        )
        
        # Decoder
        self.decoder = Decoder(
            [
                DecoderLayer(
                    AutoCorrelationLayer(
                        AutoCorrelation(True, CFG['factor'], attention_dropout=CFG['drop_out'],
                                        output_attention=False),
                        CFG['d_model'], CFG['n_heads']),
                    AutoCorrelationLayer(
                        AutoCorrelation(False, CFG['factor'], attention_dropout=CFG['drop_out'],
                                        output_attention=False),
                        CFG['d_model'], CFG['n_heads']),
                    CFG['d_model'],
                    CFG['c_out'],
                    CFG['d_ff'],
                    moving_avg=25,
                    dropout=0.1,
                    activation='gelu'
                )
                for l in range(CFG['d_layers'])
            ],
            norm_layer=my_Layernorm(CFG['d_model']),
            projection=nn.Linear(CFG['d_model'], CFG['c_out'], bias=True)
        )

    def forward(self, product_data, x_enc, x_mark_enc, x_dec, x_mark_dec,
            enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):

            # decomp init
            mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
            zeros = torch.zeros([x_dec.shape[0], self.pred_len, 1], device=x_enc.device)
            
            seasonal_init, trend_init = self.decomp(x_enc)  # 판매량만 사용해서 구함
            # decoder input
            trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
            seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
            # enc
            enc_out = self.enc_embedding(product_data, x_enc, x_mark_enc)  # 제품 정보 추가
            enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)

            # dec
            dec_product_data = product_data[:, -1, :].unsqueeze(1).repeat(1, self.seq_len, 1)

            dec_out = self.dec_embedding(dec_product_data, seasonal_init, x_mark_dec)
            seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
                                                    trend=trend_init)
            # final
            dec_out = trend_part + seasonal_part

            if self.output_attention:
                return dec_out[:, -self.pred_len:, :], attns
            else:
                return dec_out[:, -self.pred_len:, :]


## Run !!

In [59]:
def train(model, optimizer, train_loader, val_loader, device, model_scheduler, scale_max_dict, scale_min_dict):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        
        for product_data, batch_x, batch_y, batch_x_mark, batch_y_mark in tqdm(iter(train_loader)):
            product_data = product_data.to(device)
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            batch_x_mark = batch_x_mark.to(device)
            batch_y_mark = batch_y_mark.to(device)
            
            # decoder input
            dec_inp = torch.cat([batch_x, torch.zeros_like(batch_y).unsqueeze(-1)], dim=1).to(device)

            # decoder time info
            dec_mark_inp = torch.cat([batch_x_mark, batch_y_mark], dim=1).to(device)

            output = model(product_data, batch_x, batch_x_mark, dec_inp, dec_mark_inp)[0]
            output = output.squeeze(-1)

            batch_y = batch_y[:, -CFG['PREDICT_SIZE']:].to(device)
            optimizer.zero_grad()
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        
        val_loss = validation(model, val_loader, criterion, device, scale_max_dict, scale_min_dict)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model
            print('Model Saved')
            
    return best_model

In [60]:
def validation(model, val_loader, criterion, device, scale_max_dict, scale_min_dict):
    model.eval()
    val_loss = []
    
    with torch.no_grad():
        for product_data, batch_x, batch_y, batch_x_mark, batch_y_mark in tqdm(iter(train_loader)):
            product_data = product_data.to(device)
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            batch_x_mark = batch_x_mark.to(device)
            batch_y_mark = batch_y_mark.to(device)
            
            # decoder input
            dec_inp = torch.cat([batch_x, torch.zeros_like(batch_y).unsqueeze(-1)], dim=1).to(device)

            # decoder time info
            dec_mark_inp = torch.cat([batch_x_mark, batch_y_mark], dim=1).to(device)

            output = model(product_data, batch_x, batch_x_mark, dec_inp, dec_mark_inp)[0]
            output = output.squeeze(-1)
            
            batch_y = batch_y[:, -CFG['PREDICT_SIZE']:].to(device)
            
            loss = criterion(output, batch_y)
            val_loss.append(loss.item())

    return np.mean(val_loss)

In [61]:
model = Model()

model_scheduler = []
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])
infer_model = train(model, optimizer, train_loader, val_loader, device, model_scheduler,scale_max_dict,scale_min_dict)

  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.51it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.59it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.77it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.06it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.02it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.98it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.94it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  1.91it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.12it/s]


Epoch : [1] Train Loss : [0.47038] Val Loss : [0.44020]
Model Saved


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.60it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.64it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.64it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.78it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  1.91it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.89it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.90it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.95it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  1.97it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.14it/s]


Epoch : [2] Train Loss : [0.25665] Val Loss : [0.07507]
Model Saved


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.64it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.64it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.79it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.82it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.90it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.93it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  1.97it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.10it/s]


Epoch : [3] Train Loss : [0.15975] Val Loss : [0.07792]


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.78it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.06it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.06it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  2.07it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:01<00:00,  2.08it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  2.08it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.27it/s]


Epoch : [4] Train Loss : [0.11720] Val Loss : [0.05907]
Model Saved


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.64it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.78it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.13it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.13it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  2.08it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:01<00:00,  2.08it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  2.08it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.28it/s]


Epoch : [5] Train Loss : [0.09783] Val Loss : [0.04981]
Model Saved


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.77it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.05it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.04it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  2.04it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:01<00:00,  2.03it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  2.03it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.22it/s]


Epoch : [6] Train Loss : [0.08702] Val Loss : [0.04325]
Model Saved


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.61it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.78it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.04it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.02it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  2.00it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:01<00:00,  2.00it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  2.01it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.20it/s]


Epoch : [7] Train Loss : [0.07934] Val Loss : [0.03868]
Model Saved


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.77it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.00it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.02it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  2.01it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.96it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  1.98it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.18it/s]


Epoch : [8] Train Loss : [0.07742] Val Loss : [0.03374]
Model Saved


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.62it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.77it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.04it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.04it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  2.04it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:01<00:00,  2.03it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  2.03it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.23it/s]


Epoch : [9] Train Loss : [0.07515] Val Loss : [0.03481]


  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:03,  1.61it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:01<00:02,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:02<00:01,  1.63it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:03<00:00,  1.63it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:03<00:00,  1.78it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 17%|█▋        | 1/6 [00:00<00:02,  2.03it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 33%|███▎      | 2/6 [00:00<00:01,  2.03it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 50%|█████     | 3/6 [00:01<00:01,  2.03it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 67%|██████▋   | 4/6 [00:01<00:00,  2.04it/s]

torch.Size([512, 90, 1])
torch.Size([512, 111, 2])


 83%|████████▎ | 5/6 [00:02<00:00,  2.05it/s]

torch.Size([232, 90, 1])
torch.Size([232, 111, 2])


100%|██████████| 6/6 [00:02<00:00,  2.24it/s]

Epoch : [10] Train Loss : [0.07262] Val Loss : [0.03448]





## 모델 추론

In [62]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

NameError: name 'test_input' is not defined

: 

In [None]:
def inference(model, test_loader, device):
    predictions = []
    
    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)
            
            output = model(X)
            
            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()
            
            predictions.extend(output)
    
    return np.array(predictions)

: 

In [None]:
pred = inference(infer_model, test_loader, device)

: 

In [None]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]
    
# 결과 후처리
pred = np.round(pred, 0).astype(int)

: 

In [None]:
pred.shape

: 

## Submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit.head()

: 

In [None]:
submit.iloc[:,1:] = pred
submit.head()

: 

In [None]:
submit.to_csv('./baseline_submit.csv', index=False)

: 