In [None]:
import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

from pytorch_forecasting.data import NaNLabelEncoder

import pickle

In [None]:
warnings.filterwarnings("ignore")

In [None]:
np.__version__

In [None]:
# check to make sure graphics card is being used
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

In [None]:
df = pd.read_csv('../../data/imputed/Imputed_June23.csv', dtype=str)

In [None]:
# set variable types
df['ZipCode'] = df['ZipCode'].astype(str)
df['Date'] = pd.to_datetime(df['Date'])
df['ZHVI'] = df['ZHVI'].astype(float)
df['MedianSalePrice'] = df['MedianSalePrice'].astype(float)
df['MedianListPrice'] = df['MedianListPrice'].astype(float)
df['HomesSold'] = df['HomesSold'].astype(float)
df['NewListings'] = df['NewListings'].astype(float)
df['Inventory'] = df['Inventory'].astype(float)

In [None]:
df['Month'] = df['Date'].dt.month.astype(str).astype("category")
df['Year'] = df['Date'].dt.year.astype(str).astype("category")
df['Day'] = df['Date'].dt.day.astype(str).astype("category")

df['ZipCode'] = df['ZipCode'].astype("category")

df["time_idx"] = df["Date"].dt.year * 12 + df["Date"].dt.month
df["time_idx"] -= df["time_idx"].min()

df = df.drop('Date', axis=1)

In [None]:
df

In [None]:
max_prediction_length = 3
max_encoder_length = 24
training_cutoff = df["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    df[lambda x: x.index <= training_cutoff],
    time_idx="time_idx",
    target="ZHVI",
    group_ids=["ZipCode"],
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["ZipCode"],
    time_varying_known_categoricals=["Month", "Year", "Day"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["ZHVI", "MedianSalePrice", "MedianListPrice", "Inventory", "HomesSold", "NewListings"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,

    categorical_encoders = {'ZipCode': NaNLabelEncoder(add_nan=True)} #normalizer doesnt behave as expected this temporarily fixes the problem
)

# create validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

# create dataloaders for model
batch_size = 64  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)  # num_workers was set to 0, will this affect gpu utilization?
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [None]:
# calculate baseline mean absolute error, i.e. predict next value as the last available value from the history
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)]).to(device)
baseline_predictions = Baseline().to(device).predict(val_dataloader)
(actuals - baseline_predictions).abs().mean().item()

In [None]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    devices="auto",
    accelerator="gpu",
    gradient_clip_val=0.1,
)


tft = TemporalFusionTransformer.from_dataset(
    training,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.095,
    hidden_size=16,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=1,
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

In [None]:
# run to continue from a previous study stored in a pickle file
file_path = "study.pkl"

try:
    # Open the .pkl file in binary read mode
    with open(file_path, 'rb') as file:
        # Load the data from the file
        study = pickle.load(file)
        
    # Now, you can use the loaded_data object as needed
    print("Data loaded successfully:")
    print(study)

except FileNotFoundError:
    print(f"The file {file_path} was not found.")
except Exception as e:
    print(f"An error occurred: {str(e)}")


In [None]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# progressbar refresh rate is deprecated
# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="optuna_test",
    n_trials=250,
    max_epochs=50,
    timeout=3600 * 16,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(2, 4),
    #learning_rate_range=(0.08, .11), # 0.01 => .1
    dropout_range=(0.1, 0.3),
    study=study,
    #trainer_kwargs=dict(limit_train_batches=30), https://lightning.ai/docs/pytorch/1.9.3/common/trainer.html
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=True,  # use Optuna to find ideal learning rate or use in-built learning rate finder, :: originally set to False
)

# save study results - also we can resume tuning at a later point in time
with open("study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

In [None]:
print(study.best_trial.params)

In [None]:
best_params = {'gradient_clip_val': 0.8875816841642795, 'hidden_size': 15, 'dropout': 0.26734811072960285, 'hidden_continuous_size': 8, 'attention_head_size': 2, 'learning_rate': 0.10000000000000005}

In [None]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    max_epochs=100,
    devices="auto",
    accelerator="gpu",
    gradient_clip_val=0.1,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.1,
    hidden_size=15,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=2,
    dropout=0.26,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
)

In [None]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

In [None]:
# load the best model according to the validation loss
# (given that we use early stopping, this is not necessarily the last epoch) dd
best_model_path = trainer.checkpoint_callback.best_model_path
print(best_model_path)
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

### Evaluation

In [None]:
# calcualte mean absolute error on validation set
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_tft.predict(val_dataloader)
(actuals - predictions).abs().mean().item()

In [None]:
# raw predictions are a dictionary from which all kind of information including quantiles can be extracted
raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)

In [None]:
for idx in range(20):  # plot 10 examples
    best_tft.plot_prediction(x, raw_predictions, idx=idx, add_loss_to_title=True);