In [3]:
!pip install pytorch-forecasting
import torch
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
import torch.nn as nn
import torch.optim as optim
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from lightning.pytorch.tuner import Tuner

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting.metrics import MultiHorizonMetric

from sklearn.preprocessing import LabelEncoder

from google.colab import drive
drive.mount('/content/drive')

import random
import os
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
import pickle

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
train_df = pd.read_parquet('/content/drive/MyDrive/Colab Notebooks/Data/final/preprocessed_train_7.parquet')
test_df = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Data/final/preprocessed_test_7.parquet")
train_csv = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/final/train.csv')
sample_submission_csv = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/final/sample_submission.csv')

In [5]:
train_df['month'] = train_df['month'].astype(str)
train_df['time_idx'] = train_df['time_idx'].astype(int)
train_df['sales_rate'] = train_df['sales_rate'].astype(float)
train_df['week_weekend'] = train_df['week_weekend'].astype(str)
train_df['special_day'] = train_df['special_day'].astype(str)

test_df['month'] = test_df['month'].astype(str)
test_df['time_idx'] = test_df['time_idx'].astype(int)
test_df['sales_rate'] = test_df['sales_rate'].astype(float)
test_df['week_weekend'] = test_df['week_weekend'].astype(str)
test_df['special_day'] = test_df['special_day'].astype(str)

max_prediction_length = 21
min_prediction_length = 21
max_encoder_length = 90
validation_duration = 0
training_cutoff = train_df["time_idx"].max() - (max_prediction_length + validation_duration)

mid_train_df_2 = train_df[train_df['time_idx'] <= (train_df["time_idx"].max() // 1.15)]
training_cutoff_2 = train_df["time_idx"].max() - (max_prediction_length + validation_duration)

training = TimeSeriesDataSet(
    train_df[lambda x: x['time_idx'] <= training_cutoff],
    time_idx="time_idx",
    target="sales_rate",
    group_ids=['product_nums'],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=min_prediction_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["major", "middle", 'sub', 'brand'],
    static_reals=[],
    time_varying_known_categoricals=['month', 'week_weekend', 'special_day', 'day'],
    time_varying_known_reals=["keyword_cnt"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        'sales_rate', "average_month_sales_rate",'sales_rate_log','sales'],
    target_normalizer = GroupNormalizer(groups=["product_nums"], transformation = 'softplus', method="standard"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

# synchronized는 끄면 psfa_1, 켜면 psfa_2
batch_size = 1589  # set this between 32 to 128
# validation = TimeSeriesDataSet.from_dataset(training, train_df, predict=True, stop_randomization=True)
validation = TimeSeriesDataSet.from_dataset(training, train_df, predict=True, stop_randomization=True)
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=12)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=12)



In [6]:
# 1. Load the study from study_path
dataset_name = 'dataset_4'

study_path = f"/content/drive/MyDrive/Colab Notebooks/Kkh/data/optuna_best_parameter/{dataset_name}/best_parameter_study.pkl"
with open(study_path, "rb") as f:
    study = pickle.load(f)

# 2. Get the best hyperparameters from the study
best_hyperparameters = study.best_trial.params

In [7]:
# psfa_1: synchronized 옵션 끈 상태
class PSFA_1(MultiHorizonMetric):
    def loss(self, y_pred, target):
        y_pred = self.to_prediction(y_pred)
        diff_value = torch.abs(target - y_pred)
        max_value = torch.max(target, y_pred) + 1e-8
        weight_denumerator = torch.sum(target, axis=1).view(y_pred.shape[0], 1) + 1e-8
        weight = target / weight_denumerator
        loss = ((diff_value / max_value) * weight) * (y_pred.shape[1])
        return loss

# psfa_2: synchronized 옵션 킨 상태
class PSFA_2(MultiHorizonMetric):
    def loss(self, y_pred, target):
        y_pred = self.to_prediction(y_pred)
        diff_value = torch.abs(target - y_pred)
        max_value = torch.max(target, y_pred) + 1e-8
        # 위까지가 (1589, 21)

        # 행을 더한다 = 같은일자의 1589개의 품목을 더한다 = (1, 21)이 나온다.
        weight_denumerator = torch.sum(target, axis=0).view(1, y_pred.shape[1]) + 1e-8
        weight = target / weight_denumerator
        loss = ((diff_value / max_value) * weight) * (y_pred.shape[0])
        return loss

class SMAPE(MultiHorizonMetric):
    def loss(self, y_pred, target):
        y_pred = self.to_prediction(y_pred)
        print(f"1. y_pred의 shape {y_pred.shape}")
        loss =  (y_pred - target).abs() / ((y_pred.abs() + target.abs() + 1e-8)/2)
        print(f"2. loss shape {loss.shape}")
        return loss

In [8]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
checkpoint_callback = ModelCheckpoint(save_top_k=20, monitor = 'val_loss', mode = 'min', dirpath =f"/content/drive/MyDrive/Colab Notebooks/Kkh/data/optuna_record_checkpoint/final", filename = "final-{epoch:02d}-{val_loss:.7f}" )
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=2,
    accelerator="gpu",
    gradient_clip_val=best_hyperparameters['gradient_clip_val'],
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],
    logger=logger,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=best_hyperparameters['learning_rate'],
    hidden_size=best_hyperparameters['hidden_size'],
    attention_head_size=best_hyperparameters['attention_head_size'],
    dropout=best_hyperparameters['dropout'],
    hidden_continuous_size=best_hyperparameters['hidden_continuous_size'],
    loss=PSFA_2(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(


Number of parameters in network: 607.1k


In [9]:
best_model_path = '/content/drive/MyDrive/Colab Notebooks/Data/final/model/230916-nopinfo-psfa_1-epoch=06-val_loss=0.221639-train_loss_epoch=0.252218.ckpt'
tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path).to(device)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  rank_zero_warn(
  rank_zero_warn(


In [None]:
# trainer.fit(
#     tft,
#     train_dataloaders=train_dataloader,
#     val_dataloaders=val_dataloader
# )

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | PSFA_2                          | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 265 K 
3  | prescalers                         | ModuleDict                      | 216   
4  | static_variable_selection          | VariableSelectionNetwork        | 11.4 K
5  | encoder_variable_selection         | VariableSelectionNetwork        | 20.1 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 7.5 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 26.7 K
8  | static_context_initia

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

In [None]:
# 아래 코드는 한 번 중지하면 세션이 꺼져버린다. 주의해서 실행할 것
optuna_record_path = "/content/drive/MyDrive/Colab Notebooks/Kkh/data/optuna_record_checkpoint/final"
study_path = f"/content/drive/MyDrive/Colab Notebooks/Kkh/data/optuna_best_parameter/final/final_parameter.pkl"


# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path=optuna_record_path,
    # n_trials=1,
    # max_epochs=1,
    n_trials=10,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=50),
    reduce_on_plateau_patience=100,
    use_learning_rate_finder=False, # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open(study_path, "wb") as f:
    pickle.dump(study, f)

# show best hyperparameters
print(study.best_trial.params)

[I 2023-09-16 10:44:43,196] A new study created in memory with name: no-name-03c16bc7-faa3-401c-93fe-30eae647ad5a
  gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  dropout=trial.suggest_uniform("dropout", *dropout_range),
  rank_zero_warn(
  rank_zero_warn(
  model.hparams.learning_rate = trial.suggest_loguniform("learning_rate", *learning_rate_range)
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=50` reached.
[I 2023-09-16 12:12:47,814] Trial 0 finished with valu

{'gradient_clip_val': 0.27356558898834454, 'hidden_size': 54, 'dropout': 0.27502635274983633, 'hidden_continuous_size': 46, 'attention_head_size': 1, 'learning_rate': 0.031475393111542606}


In [None]:
# 1. Load the study from study_path

dataset_name = 'dataset_6'
test_id = '230810-0'

study_path = f"/content/drive/MyDrive/Colab Notebooks/Kkh/data/optuna_best_parameter/{dataset_name}/best_parameter_study.pkl"
with open(study_path, "rb") as f:
    study = pickle.load(f)

# 2. Get the best hyperparameters from the study
best_hyperparameters = study.best_trial.params
best_hyperparameters

{'gradient_clip_val': 0.8369364469185068,
 'hidden_size': 26,
 'dropout': 0.2271421770302028,
 'hidden_continuous_size': 16,
 'attention_head_size': 2,
 'learning_rate': 0.0033604196637294107}

In [None]:
# 1. Load the study from study_path

dataset_name = 'dataset_4'
test_id = '230809-0'

study_path = f"/content/drive/MyDrive/Colab Notebooks/Kkh/data/optuna_best_parameter/{dataset_name}/best_parameter_study.pkl"
with open(study_path, "rb") as f:
    study = pickle.load(f)

# 2. Get the best hyperparameters from the study
best_hyperparameters = study.best_trial.params
best_hyperparameters

{'gradient_clip_val': 0.03737036693440084,
 'hidden_size': 81,
 'dropout': 0.28740217775266835,
 'hidden_continuous_size': 12,
 'attention_head_size': 2,
 'learning_rate': 0.00585230984542428}

# Dataset_2 실험 결과 정리
  1. 데이터셋은 위와 같다.
    * 특이점 : product를 칼럼에서 제외하였다.
