In [1]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


  from .autonotebook import tqdm as notebook_tqdm


## hyperparameter

In [2]:
CFG = {
    'TRAIN_WINDOW_SIZE':150, # 90일치로 학습
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':15,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':4096,
    'SEED':41
}

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [4]:
train_data = pd.read_csv('./train.csv').drop(columns=['ID', '제품'])

In [5]:
# Data Scaling
scale_max_dict = {}
scale_min_dict = {}

for idx in tqdm(range(len(train_data))):
    maxi = np.max(train_data.iloc[idx,4:])
    mini = np.min(train_data.iloc[idx,4:])
    
    if maxi == mini :
        train_data.iloc[idx,4:] = 0
    else:
        train_data.iloc[idx,4:] = (train_data.iloc[idx,4:] - mini) / (maxi - mini)
    
    scale_max_dict[idx] = maxi
    scale_min_dict[idx] = mini

100%|██████████| 15890/15890 [02:09<00:00, 122.71it/s]


In [6]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류', '브랜드']

for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])

In [7]:
def make_train_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE']):
    '''
    학습 기간 블럭, 예측 기간 블럭의 세트로 데이터를 생성
    data : 일별 판매량
    train_size : 학습에 활용할 기간
    predict_size : 추론할 기간
    '''
    num_rows = len(data)
    window_size = train_size + predict_size
    
    input_data = np.empty((num_rows * (len(data.columns) - window_size + 1), train_size, len(data.iloc[0, :4]) + 1))
    target_data = np.empty((num_rows * (len(data.columns) - window_size + 1), predict_size))
    
    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, 4:])
        
        for j in range(len(sales_data) - window_size + 1):
            window = sales_data[j : j + window_size]
            temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
            input_data[i * (len(data.columns) - window_size + 1) + j] = temp_data
            target_data[i * (len(data.columns) - window_size + 1) + j] = window[train_size:]
    
    return input_data, target_data

In [8]:
def make_predict_data(data, train_size=CFG['TRAIN_WINDOW_SIZE']):
    '''
    평가 데이터(Test Dataset)를 추론하기 위한 Input 데이터를 생성
    data : 일별 판매량
    train_size : 추론을 위해 필요한 일별 판매량 기간 (= 학습에 활용할 기간)
    '''
    num_rows = len(data)
    
    input_data = np.empty((num_rows, train_size, len(data.iloc[0, :4]) + 1))
    
    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -train_size:])
        
        window = sales_data[-train_size : ]
        temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
        input_data[i] = temp_data
    
    return input_data

In [9]:
train_input, train_target = make_train_data(train_data)
test_input = make_predict_data(train_data)

100%|██████████| 15890/15890 [00:48<00:00, 326.70it/s]
100%|██████████| 15890/15890 [00:10<00:00, 1538.44it/s]


In [10]:
# Train / Validation Split
data_len = len(train_input)
val_input = train_input[-int(data_len*0.2):]
val_target = train_target[-int(data_len*0.2):]
train_input = train_input[:-int(data_len*0.2)]
train_target = train_target[:-int(data_len*0.2)]

train_input.shape, train_target.shape, val_input.shape, val_target.shape, test_input.shape

((3724616, 150, 5),
 (3724616, 21),
 (931154, 150, 5),
 (931154, 21),
 (15890, 150, 5))

## Custom dataset

In [11]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])
    
    def __len__(self):
        return len(self.X)

In [12]:
train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## model

In [13]:
class BaseModel(nn.Module):
    def __init__(self, input_size=5, hidden_size=512, output_size=CFG['PREDICT_SIZE']):
        super(BaseModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size//2),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden_size//2, output_size)
        )
            
        self.actv = nn.ReLU()
    
    def forward(self, x):
        # x shape: (B, TRAIN_WINDOW_SIZE, 5)
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)
        
        # LSTM layer
        lstm_out, hidden = self.lstm(x, hidden)
        
        # Only use the last output sequence
        last_output = lstm_out[:, -1, :]
        
        # Fully connected layer
        output = self.actv(self.fc(last_output))
        
        return output.squeeze(1)
    
    def init_hidden(self, batch_size, device):
        # Initialize hidden state and cell state
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))

In [14]:
def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        train_mae = []
        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)
            
            optimizer.zero_grad()
            
            output = model(X)
            # print("Model Output Shape:", output.shape)
            # print("Target Shape:", Y.shape)
            loss = criterion(output, Y)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')
        
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            print('Model Saved')
    return best_model

In [15]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)
            
            output = model(X)
            loss = criterion(output, Y)
            
            val_loss.append(loss.item())
    return np.mean(val_loss)

In [16]:
model = BaseModel()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
infer_model = train(model, optimizer, train_loader, val_loader, device)

100%|██████████| 910/910 [02:24<00:00,  6.30it/s]
100%|██████████| 228/228 [00:17<00:00, 13.31it/s]


Epoch : [1] Train Loss : [0.02923] Val Loss : [0.02995]
Model Saved


100%|██████████| 910/910 [02:23<00:00,  6.34it/s]
100%|██████████| 228/228 [00:15<00:00, 14.54it/s]


Epoch : [2] Train Loss : [0.02808] Val Loss : [0.02875]
Model Saved


100%|██████████| 910/910 [02:17<00:00,  6.62it/s]
100%|██████████| 228/228 [00:16<00:00, 13.69it/s]


Epoch : [3] Train Loss : [0.02571] Val Loss : [0.03027]


100%|██████████| 910/910 [02:20<00:00,  6.48it/s]
100%|██████████| 228/228 [00:16<00:00, 13.77it/s]


Epoch : [4] Train Loss : [0.02327] Val Loss : [0.02707]
Model Saved


100%|██████████| 910/910 [02:20<00:00,  6.49it/s]
100%|██████████| 228/228 [00:16<00:00, 13.65it/s]


Epoch : [5] Train Loss : [0.01919] Val Loss : [0.01837]
Model Saved


100%|██████████| 910/910 [02:20<00:00,  6.48it/s]
100%|██████████| 228/228 [00:16<00:00, 13.80it/s]


Epoch : [6] Train Loss : [0.01856] Val Loss : [0.02166]


100%|██████████| 910/910 [02:20<00:00,  6.47it/s]
100%|██████████| 228/228 [00:16<00:00, 13.56it/s]


Epoch : [7] Train Loss : [0.01816] Val Loss : [0.02649]


100%|██████████| 910/910 [02:20<00:00,  6.48it/s]
100%|██████████| 228/228 [00:16<00:00, 13.70it/s]


Epoch : [8] Train Loss : [0.01831] Val Loss : [0.01768]
Model Saved


100%|██████████| 910/910 [02:21<00:00,  6.42it/s]
100%|██████████| 228/228 [00:16<00:00, 13.82it/s]


Epoch : [9] Train Loss : [0.01788] Val Loss : [0.01743]
Model Saved


100%|██████████| 910/910 [02:21<00:00,  6.43it/s]
100%|██████████| 228/228 [00:16<00:00, 13.72it/s]


Epoch : [10] Train Loss : [0.01805] Val Loss : [0.01777]


100%|██████████| 910/910 [02:22<00:00,  6.40it/s]
100%|██████████| 228/228 [00:16<00:00, 13.87it/s]


Epoch : [11] Train Loss : [0.01766] Val Loss : [0.01739]
Model Saved


100%|██████████| 910/910 [02:22<00:00,  6.40it/s]
100%|██████████| 228/228 [00:16<00:00, 13.76it/s]


Epoch : [12] Train Loss : [0.01785] Val Loss : [0.01741]


100%|██████████| 910/910 [02:21<00:00,  6.41it/s]
100%|██████████| 228/228 [00:15<00:00, 14.64it/s]


Epoch : [13] Train Loss : [0.01770] Val Loss : [0.03236]


100%|██████████| 910/910 [02:15<00:00,  6.71it/s]
100%|██████████| 228/228 [00:15<00:00, 14.68it/s]


Epoch : [14] Train Loss : [0.01760] Val Loss : [0.01737]
Model Saved


100%|██████████| 910/910 [02:15<00:00,  6.69it/s]
100%|██████████| 228/228 [00:15<00:00, 14.57it/s]

Epoch : [15] Train Loss : [0.01768] Val Loss : [0.01730]
Model Saved





In [17]:
# def PSFA(pred_sell, true_sell, train): 
#     upper_cat = train['대분류'].unique()
#     PSFA = 1
#     for cat in upper_cat:
#         temp_ID_list = train.loc[train['대분류'] == cat, 'ID'].values
#         for day in range(1, 16):
#             total_sell = np.sum(true_sell.loc[true_sell['ID'].isin(temp_ID_list), f'Q{day}'].values) # day 총 판매량
#             pred_values = pred_sell.loc[pred_sell['ID'].isin(temp_ID_list), f'Q{day}'].values # 특정일의 예측 판매량
#             true_values = true_sell.loc[true_sell['ID'].isin(temp_ID_list), f'Q{day}'].values # 특정일 실제 판매량
            
#             # 실제 판매와 예측 판매가 같은 경우 오차가 없는 것으로 간주 
#             denominator = np.maximum(true_values, pred_values)
#             diffs = np.where(denominator!=0, np.abs(true_values - pred_values) / denominator, 0)
            
#             day_sell = true_values.sum(axis=0) # Item별 실제 판매량
#             sell_weights = day_sell / total_sell # Item별 day 총 판매량 내 비중
#             PSFA -= np.sum(diffs * sell_weights)/(15*len(upper_cat))
#             # PSFA_m의 평균이 PSFA이므로,15일 평균 and 대분류별 평균 적용하여 감산
#             # PSFA = 1 - (((오차 * 제품비중의 제품별 가중평균)의 일자별 산술 평균)의 대분류별 산술 평균)
#     return PSFA

## 모델 추론

In [18]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

def inference(model, test_loader, device):
    predictions = []
    
    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)
            
            output = model(X)
            
            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()
            
            predictions.extend(output)
    
    return np.array(predictions)

pred = inference(infer_model, test_loader, device)

100%|██████████| 4/4 [00:00<00:00, 20.73it/s]


In [19]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]
    
# 결과 후처리
pred = np.round(pred, 0).astype(int)

In [20]:
pred.shape

(15890, 21)

In [21]:
submit = pd.read_csv('./sample_submission.csv')
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
submit.iloc[:,1:] = pred
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,1,1,1
3,3,0,0,0,0,0,0,0,0,0,...,0,1,1,1,1,1,1,1,1,1
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
submit.to_csv('./baseline_150.csv', index=False)