In [1]:
!pip install pytorch-forecasting
import torch
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
import torch.nn as nn
import torch.optim as optim
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from lightning.pytorch.tuner import Tuner

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting.metrics import MultiHorizonMetric

from sklearn.preprocessing import LabelEncoder
from pytorch_forecasting.metrics import MultiHorizonMetric

from google.colab import drive
drive.mount('/content/drive')

import random
import os
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
import pickle

Collecting pytorch-forecasting
  Downloading pytorch_forecasting-1.0.0-py3-none-any.whl (140 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.4/140.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.80 (from pytorch-forecasting)
  Downloading fastapi-0.103.1-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting)
  Downloading lightning-2.0.9-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
Collecting optuna<4.0.0,>=3.1.0 (from pytorch-forecasting)
  Downloading optuna-3.3.0-py3-none-any.whl (404 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.2/404.2 kB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-optimizer<3.0.0,>=2.5.1 (from pytorch-forecasting)
  D

In [None]:
train_df = pd.read_parquet('/content/drive/MyDrive/Colab Notebooks/Data/final/preprocessed_train_7.parquet')
test_df = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Data/final/preprocessed_test_7.parquet")
# sample_submission_csv = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/sample_submission.csv')
train_df = pd.concat([train_df, test_df], axis=0, ignore_index=True)

In [None]:
# 링크 : https://pytorch-forecasting.readthedocs.io/en/latest/tutorials/stallion.html
train_df['month'] = train_df['month'].astype(str)
train_df['time_idx'] = train_df['time_idx'].astype(int)
train_df['sales_rate'] = train_df['sales_rate'].astype(float)
train_df['week_weekend'] = train_df['week_weekend'].astype(str)
train_df['special_day'] = train_df['special_day'].astype(str)
train_df['product_info_label'] = train_df['product_info_label'].astype(str)
train_df['day'] = train_df['day'].astype(str)

max_prediction_length = 21
min_prediction_length = 21
max_encoder_length = 90
min_encoder_length = 90
batch_size = 1605
validation_duration = 0

print("Load Optimized Parameter")
print("아직 파라미터 최적화는 확실하게 되지는 않았습니다. 데이터셋 4번을 기준으로 하고 있습니다.")
# 아래는 hyper 파라미터 path입니다.
study_path = f"/content/drive/MyDrive/Colab Notebooks/Kkh/data/optuna_best_parameter/dataset_4/best_parameter_study.pkl"
with open(study_path, "rb") as f:
    study = pickle.load(f)

best_hyperparameters = study.best_trial.params
print(best_hyperparameters)

print("Make Time Series Dataset")

# psfa_1: synchronized 옵션 끈 상태
class PSFA_1(MultiHorizonMetric):
    def loss(self, y_pred, target):
        y_pred = self.to_prediction(y_pred)
        diff_value = torch.abs(target - y_pred)
        max_value = torch.max(target, y_pred) + 1e-8
        weight_denumerator = torch.sum(target, axis=1).view(y_pred.shape[0], 1) + 1e-8
        weight = target / weight_denumerator
        loss = ((diff_value / max_value) * weight) * (y_pred.shape[1])
        return loss

# psfa_2: synchronized 옵션 킨 상태
class PSFA_2(MultiHorizonMetric):
    def loss(self, y_pred, target):
        y_pred = self.to_prediction(y_pred)
        diff_value = torch.abs(target - y_pred)
        max_value = torch.max(target, y_pred) + 1e-8
        # 위까지가 (1589, 21)

        # 행을 더한다 = 같은일자의 1589개의 품목을 더한다 = (1, 21)이 나온다.
        weight_denumerator = torch.sum(target, axis=0).view(1, y_pred.shape[1]) + 1e-8
        # print(f"가중치 분모의 shape: {weight_denumerator.shape}") # (1, 21)이 나온다면 옳은 것.
        weight = target / weight_denumerator
        # print(f"가중치의 shape: {weight.shape}") # (1589, 21)이 아논다면 맞는 것.
        loss = ((diff_value / max_value) * weight) * (y_pred.shape[0])
        return loss

class SMAPE(MultiHorizonMetric):
    def loss(self, y_pred, target):
        y_pred = self.to_prediction(y_pred)
        print(f"1. y_pred의 shape {y_pred.shape}")
        loss =  (y_pred - target).abs() / ((y_pred.abs() + target.abs() + 1e-8)/2)
        print(f"2. loss shape {loss.shape}")
        return loss


training_cutoff = train_df["time_idx"].max() - (max_prediction_length + validation_duration)

training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target="sales_rate",
    group_ids=['product_nums'],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=min_prediction_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["major", "middle", 'sub', 'brand', 'shop'],
    static_reals=[],
    time_varying_known_categoricals=['month', 'week_weekend', 'special_day', 'day'],
    time_varying_known_reals=["keyword_cnt"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        'sales_rate', "average_month_sales_rate",'sales_rate_log','sales'],
    target_normalizer = GroupNormalizer(groups=["product_nums"], transformation = 'softplus', method="standard"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

Load Optimized Parameter
아직 파라미터 최적화는 확실하게 되지는 않았습니다. 데이터셋 4번을 기준으로 하고 있습니다.
{'gradient_clip_val': 0.03737036693440084, 'hidden_size': 81, 'dropout': 0.28740217775266835, 'hidden_continuous_size': 12, 'attention_head_size': 2, 'learning_rate': 0.00585230984542428}
Make Time Series Dataset


In [None]:
# synchronized는 끄면 psfa_1, 켜면 psfa_2
batch_size = 1605  # set this between 32 to 128
# validation = TimeSeriesDataSet.from_dataset(training, train_df, predict=True, stop_randomization=True)
validation = TimeSeriesDataSet.from_dataset(training, train_df, predict=True, stop_randomization=True)
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=12, batch_sampler = 'synchronized')
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=12, batch_sampler = 'synchronized')
best_model_path = # 가장 잘 나온 모델의 ckpt 경로
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path).to(device)

In [None]:
predictions = best_tft.predict(val_dataloader, trainer_kwargs=dict(accelerator="gpu"),  return_x=True, return_y = True)
sample_submission_csv = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/sample_submission.csv')
submission = sample_submission_csv.drop(columns=['ID'])
submission = pd.DataFrame(F.relu(torch.round(predictions.output)).detach().cpu().numpy()).astype(int)
submission = pd.concat([sample_submission_csv['ID'], submission], axis = 1)
submission.columns = sample_submission_csv.columns
submission.to_csv("/content/drive/MyDrive/Colab Notebooks/Kkh/data/submission/submission_230901-336416.csv", index=False)