## Import

In [None]:
import random
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

## Hyperparameter Setting

In [None]:
CFG = {
    'TRAIN_WINDOW_SIZE':60,
    'PREDICT_SIZE':21,
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':4096,
    'SEED': 42
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

### 데이터 불러오기

In [None]:
train_cat = pd.read_csv('/train.csv').drop(columns=['ID', '제품'])
train_num = pd.read_csv('/train.csv').drop(columns=['ID', '제품'])

train_cat = train_cat.iloc[:,:4]
train_num = train_num.iloc[:, 4:-41]

train_data = pd.concat([train_cat, train_num], axis=1)

out_train = pd.read_csv('train.csv')
out_train = out_train.iloc[:, -7:]

train_data = pd.concat([train_data, out_train], axis=1)

In [None]:
# 숫자형 변수들의 min-max scaling을 수행하는 코드입니다. real_train
numeric_cols = train_data.columns[4:]

# 각 column의 min 및 max 계산
min_values = train_data[numeric_cols].min(axis = 1)
max_values = train_data[numeric_cols].max(axis = 1)

# 각 행의 범위(max-min)를 계산하고, 범위가 0인 경우 1로 대체
ranges = max_values - min_values
ranges[ranges == 0] = 1

# min-max scaling 수행
train_data[numeric_cols] = (train_data[numeric_cols].subtract(min_values, axis = 0)).div(ranges, axis = 0)

# max와 min 값을 dictionary 형태로 저장
scale_min_dict = min_values.to_dict()
scale_max_dict = max_values.to_dict()

In [None]:
# 1. 범주형 변수 레이블 인코딩
label_encoders = {}  # 각 컬럼별로 LabelEncoder를 저장
categorical_columns = ['대분류', '중분류', '소분류', '브랜드']

for col in categorical_columns:
    le = LabelEncoder()
    train_data[col] = le.fit_transform(train_data[col]).astype(int)
    label_encoders[col] = le

# 2. 임베딩 레이어 생성
class CategoricalEmbedding(nn.Module):
    def __init__(self, input_sizes, embedding_dims):
        super(CategoricalEmbedding, self).__init__()

        # 각 범주형 변수에 대한 임베딩 레이어를 생성
        self.embeddings = nn.ModuleList([
            nn.Embedding(input_size, dim) for input_size, dim in zip(input_sizes, embedding_dims)
        ])

    def forward(self, x):
        # x: [batch_size, num_categorical_features]
        embedded = [embedding(x[:, i]) for i, embedding in enumerate(self.embeddings)]
        return torch.cat(embedded, dim=1)  # 연결된 임베딩 벡터 반환

# 각 범주형 변수의 최대값 (레이블 인코딩된 값) + 1을 구함
input_sizes = [train_data[col].max() + 1 for col in categorical_columns]

# 임베딩 차원 설정
embedding_dims = [int(np.sqrt(size) // 2) for size in input_sizes]

model = CategoricalEmbedding(input_sizes, embedding_dims)

# 모든 행에 대한 범주형 데이터를 PyTorch 텐서로 변환
all_data_tensor = torch.tensor(train_data[categorical_columns].values, dtype = torch.long)

# 임베딩 모델에 텐서를 입력하여 임베딩된 값을 얻음
with torch.no_grad():
    all_embedded_values = model(all_data_tensor)

# 임베딩된 텐서를 numpy 배열로 변환
all_embedded_np = all_embedded_values.numpy()

# 임베딩된 값을 저장할 임시 데이터프레임 생성
embedded_df = pd.DataFrame()

start_idx = 0
# 각 범주형 변수에 대한 임베딩된 값을 새로운 컬럼으로 추가
for i, col in enumerate(categorical_columns):
    col_names = [f"{col}_{j}" for j in range(embedding_dims[i])]
    for idx, name in enumerate(col_names):
        embedded_df[name] = all_embedded_np[:, start_idx + idx]
    start_idx += embedding_dims[i]

# 레이블 인코딩된 컬럼 제거
train_data.drop(columns=categorical_columns, inplace = True)

# 임베딩된 데이터를 원본 데이터프레임의 앞 부분에 추가
train_data = pd.concat([embedded_df, train_data], axis = 1)

# 결과 확인
train_data.head()

In [None]:
train_data.iloc[:, 33]

In [None]:
def make_train_data(data, train_size = CFG['TRAIN_WINDOW_SIZE'], predict_size = CFG['PREDICT_SIZE']):
    STEP_SIZE = 2

    num_rows = len(data)
    window_size = train_size + predict_size
    adjusted_size = (len(data.columns) - window_size + 1) // STEP_SIZE

    input_data = np.empty((num_rows * adjusted_size, train_size, len(data.iloc[0, :33]) + 1))
    target_data = np.empty((num_rows * adjusted_size, predict_size))

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :33])
        sales_data = np.array(data.iloc[i, 33:])

        for j in range(0, len(sales_data) - window_size + 1, STEP_SIZE):
            window = sales_data[j: j + window_size]
            temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
            input_data[i * adjusted_size + j // STEP_SIZE] = temp_data
            target_data[i * adjusted_size + j // STEP_SIZE] = window[train_size:]

    return input_data, target_data

In [None]:
def make_predict_data(data, train_size = CFG['TRAIN_WINDOW_SIZE']):
    num_rows = len(data)

    input_data = np.empty((num_rows, train_size, len(data.iloc[0, :33]) + 1))

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :33])
        sales_data = np.array(data.iloc[i, -train_size:])

        window = sales_data[-train_size : ]
        temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
        input_data[i] = temp_data

    return input_data

In [None]:
train_input, train_target = make_train_data(train_data)
test_input = make_predict_data(train_data)

In [None]:
data_len = len(train_input)
val_input = train_input[-int(data_len * 0.2):]
val_target = train_target[-int(data_len * 0.2):]
train_input = train_input[:-int(data_len * 0.2)]
train_target = train_target[:-int(data_len * 0.2)]

In [None]:
train_input.shape, train_target.shape, val_input.shape, val_target.shape, test_input.shape

### Custom Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])

    def __len__(self):
        return len(self.X)

In [None]:
train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle = True, num_workers = 0)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle = False, num_workers = 0)

### 모델 선언

In [None]:
class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x * torch.tanh(nn.functional.softplus(x))

class StackedLSTMModel(nn.Module):
    def __init__(self, input_size = 34, hidden_size = 1024, output_size = CFG['PREDICT_SIZE'], num_layers = 3, dropout = 0.5):
        super(StackedLSTMModel, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM 레이어 내부에 dropout 적용
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, dropout = (0 if num_layers == 1 else dropout), batch_first = True)

        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size//2),
            Mish(),
            nn.Linear(hidden_size//2, output_size)
        )

        self.actv = Mish()

    def forward(self, x):
        # x shape: (B, TRAIN_WINDOW_SIZE, 5)
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        # LSTM layers
        x, hidden = self.lstm(x, hidden)

        # Only use the last output sequence
        last_output = x[:, -1, :]

        # Fully connected layer
        output = self.actv(self.fc(last_output))

        return output.squeeze(1)

    def init_hidden(self, batch_size, device):
        # Initialize hidden state and cell state
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device = device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size, device = device))


### 모델 학습

In [None]:
model = StackedLSTMModel()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.8)

class EarlyStopping:
    def __init__(self, patience = 2, verbose=False, delta = 0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.delta = delta

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None

    early_stopping = EarlyStopping(patience = 2, verbose = True)

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        train_mae = []
        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()

            output = model(X)
            loss = criterion(output, Y)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')

        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            print('Model Saved')

        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    return best_model

In [None]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []

    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)

            output = model(X)
            loss = criterion(output, Y)

            val_loss.append(loss.item())
    return np.mean(val_loss)

## Run !!

In [None]:
infer_model = train(model, optimizer, train_loader, val_loader, device)

## 모델 추론

In [None]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle = False, num_workers = 0)

In [None]:
def inference(model, test_loader, device):
    predictions = []

    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)

            output = model(X)

            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()

            predictions.extend(output)

    return np.array(predictions)

In [None]:
pred = inference(infer_model, test_loader, device)

In [None]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

# 결과 후처리
pred = np.round(pred, 0).astype(int)

In [None]:
pred.shape

## Submission

In [None]:
submit = pd.read_csv('/sample_submission.csv')
submit.head()

In [None]:
submit.iloc[:,1:] = pred
submit.head()

In [None]:
submit.to_csv('/University_of_Ulsan.csv', index=False)