# Temporal Fusion Transformer - Individual
### Load packages

In [None]:
import lightning.pytorch as pl
import time as time
import pandas as pd
import numpy as np
import torch
import os

from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting import Baseline, TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data.encoders import TorchNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.tuner import Tuner

### Load data

In [None]:
Directory = 'C:/.../TFT_for_Stock_Movement_Prediction/data'

# Target and return feature
CCR = pd.read_csv(os.path.join(Directory, 'CCR.csv'), index_col = [0])

### Features
## Time features - Categorical
time_features = pd.read_csv(os.path.join(Directory, 'time_features.csv'), index_col = [0])['0'].tolist()
for i in range(len(time_features)):
    locals()[time_features[i]] = pd.read_csv(os.path.join(Directory, time_features[i] + '.csv'), index_col = [0])

## Basic historical features
bh_features = pd.read_csv(os.path.join(Directory, 'bh_features.csv'), index_col = [0])['0'].tolist()
for i in range(len(bh_features)):
    locals()[bh_features[i]] = pd.read_csv(os.path.join(Directory, bh_features[i] + '.csv'), index_col = [0])

# Categorical
bh_categorical_features = pd.read_csv(os.path.join(Directory, 'bh_categorical_features.csv'), index_col = [0])['0'].tolist()

# Continuous
bh_continuous_features = pd.read_csv(os.path.join(Directory, 'bh_continuous_features.csv'), index_col = [0])['0'].tolist()

## Technical indicators - Continuous
indicator_features = pd.read_csv(os.path.join(Directory, 'indicator_features.csv'), index_col = [0])['0'].tolist()
for i in range(len(indicator_features)):
    locals()[indicator_features[i]] = pd.read_csv(os.path.join(Directory, indicator_features[i] + '.csv'), index_col = [0])

### Model preparation
#### Define variables

In [None]:
# Study periods length
period_b = 0, 250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250
period_e = 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000, 3250

# Split period into training, validation and test set
training_size = 750
test_size = 250
validation_split = 0.2
training_cutoff = int(training_size - training_size * validation_split)

# Target
Target_feature = ['CCR']

# Features
Feature_type = ['object']
time_varying_known_categoricals = time_features
time_varying_unknown_categoricals = bh_categorical_features
time_varying_unknown_reals = bh_continuous_features + indicator_features

## Model parameters
# Dataset
max_prediction_length = 1
max_encoder_length = 258

# Data loader
batch_size = 64

# Trainer
max_epochs = 100
gradient_clip_val = 1
log_every_n_steps = 10

# TFT
learning_rate = 0.001
hidden_size = 50
hidden_continuous_size = 25
attention_head_size = 4
dropout = 0.20
quantiles = [0.1, 0.5, 0.9]
output_size = len(quantiles)
loss = QuantileLoss(quantiles = quantiles)

# Early stopping
monitor = 'val_loss'
min_delta = 1e-4
patience = 10
mode = 'min'

# Learning rate logger
lr_logger = LearningRateMonitor()

# Learning rate finder
max_lr = 1
min_lr = 1e-6

# Hyperparameter tuning
batch_size_list = [16, 32, 64, 128]

# File path to save results
File_name_results = 'results/Individual/Results_Individual.csv'
File_name_results_p90 = 'results/Individual/Results_p90_Individual.csv'
File_name_results_p10 = 'results/Individual/Results_p10_Individual.csv'
File_name_baseline = 'results/Baseline/Baseline.csv'

#### Preparation of datasets

In [None]:
# Datasets for each study period and stock
def dataset(period, stock):
    data = pd.DataFrame(index = globals()[Target_feature[0]].index[period_b[period] : period_e[period]])
    
    ## Add features
    data['Time_idx'] = range(period_b[0] + 1, period_e[0] + 1)
    data['Target'] = globals()[Target_feature[0]][[globals()[Target_feature[0]].columns[stock]]][period_b[period] : period_e[period]]
    data['Stock'] = globals()[Target_feature[0]].columns[stock]
    
    # Time varying known categoricals
    for f in range(len(time_varying_known_categoricals)):
        data[time_varying_known_categoricals[f]] = globals()[time_varying_known_categoricals[f]].astype(Feature_type[0])
    
    # Time varying unknown categoricals
    for f in range(len(time_varying_unknown_categoricals)):
        data[time_varying_unknown_categoricals[f]] = globals()[time_varying_unknown_categoricals[f]][globals()[time_varying_unknown_categoricals[f]].columns[stock]].astype(Feature_type[0])
    
    # Time varying unknown reals
    for f in range(len(time_varying_unknown_reals)):
        data[time_varying_unknown_reals[f]] = globals()[time_varying_unknown_reals[f]][globals()[time_varying_unknown_reals[f]].columns[stock]]
    return data

### Model

In [None]:
start = time.time()
Results = pd.DataFrame(index = range(period_b[0], period_e[9] - training_size), columns = locals()[Target_feature[0]].columns)
Results_p90 = pd.DataFrame(index = range(period_b[0], period_e[9] - training_size), columns = locals()[Target_feature[0]].columns)
Results_p10 = pd.DataFrame(index = range(period_b[0], period_e[9] - training_size), columns = locals()[Target_feature[0]].columns)
Baseline_results = pd.DataFrame(index = range(period_b[0], period_e[9] - training_size), columns = locals()[Target_feature[0]].columns)
for i in range(len(period_b)):
    start_period = time.time()
    for j in range(len(locals()[Target_feature[0]].columns)):
        start_stock = time.time()
        data = dataset(i, j)
        
        #### Create dataframe
        training = TimeSeriesDataSet(
        data[lambda x: x.Time_idx <= training_cutoff],
        time_idx = data.columns[0],
        target = data.columns[1],
        group_ids = [data.columns[2]],
        max_encoder_length = max_encoder_length,
        max_prediction_length = max_prediction_length,
        time_varying_known_reals = [data.columns[0]],
        time_varying_unknown_reals = [data.columns[1]] + time_varying_unknown_reals,
        time_varying_known_categoricals = time_varying_known_categoricals,
        time_varying_unknown_categoricals = time_varying_unknown_categoricals,
        target_normalizer = TorchNormalizer())
        validation = TimeSeriesDataSet.from_dataset(training, data[lambda x: x.Time_idx <= training_size], min_prediction_idx = training_cutoff + 1)
        test = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx = training_size + 1)
        train_dataloader = training.to_dataloader(train = True, batch_size = batch_size)
        val_dataloader = validation.to_dataloader(train = False, batch_size = training_size - training_cutoff)
        
        #### Model architecture
        early_stop_callback = EarlyStopping(monitor = monitor, min_delta = min_delta, verbose = True, patience = patience, mode = mode)
        logger = TensorBoardLogger('logs', name = f'Individual/Period_{i + 1}/Stock_{j + 1}')
        trainer = pl.Trainer(
        max_epochs = max_epochs,
        enable_model_summary = False,
        gradient_clip_val = gradient_clip_val,
        log_every_n_steps = log_every_n_steps,
        callbacks = [lr_logger, early_stop_callback],
        logger = logger)
        tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate = learning_rate,                        
        hidden_size = hidden_size,
        hidden_continuous_size = hidden_continuous_size,
        attention_head_size = attention_head_size,                     
        dropout = dropout,
        output_size = output_size,
        loss = loss)
        trainer.fit(tft, train_dataloaders = train_dataloader, val_dataloaders = val_dataloader)
        
        #### Predicting
        best_tft = TemporalFusionTransformer.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
        predictions = pd.DataFrame(best_tft.predict(test).cpu())
        Results.iloc[period_b[i] : period_b[i] + test_size, j] = predictions[0]
        predictions_p90 = pd.DataFrame(best_tft.predict(test, mode = 'quantiles').cpu()[:,0][:,2])
        Results_p90.iloc[period_b[i] : period_b[i] + test_size, j] = predictions_p90[0]
        predictions_p10 = pd.DataFrame(best_tft.predict(test, mode = 'quantiles').cpu()[:,0][:,0])
        Results_p10.iloc[period_b[i] : period_b[i] + test_size, j] = predictions_p10[0]
        baseline_predictions = pd.DataFrame(Baseline().predict(test).cpu())
        Baseline_results.iloc[period_b[i] : period_b[i] + test_size, j] = baseline_predictions[0]
        print(f'Compilation time - Period {i + 1} - Stock {j + 1}: {round(time.time() - start_stock)} seconds')
    print(f'Compilation time - Period {i + 1}: {round(time.time() - start_period)} seconds')
Results.to_csv(File_name_results)
Results_p90.to_csv(File_name_results_p90)
Results_p10.to_csv(File_name_results_p10)
Baseline_results.to_csv(File_name_baseline)
print(f'Compilation time: {round(time.time() - start)} seconds')

### Learningrate finder

In [None]:
# Create dataframe
data = dataset(np.random.randint(10), np.random.randint(33))
training = TimeSeriesDataSet(
data[lambda x: x.Time_idx <= training_cutoff],
time_idx = data.columns[0],
target = data.columns[1],
group_ids = [data.columns[2]],
max_encoder_length = max_encoder_length,
max_prediction_length = max_prediction_length,
time_varying_known_reals = [data.columns[0]],
time_varying_unknown_reals = [data.columns[1]] + time_varying_unknown_reals,
time_varying_known_categoricals = time_varying_known_categoricals,
time_varying_unknown_categoricals = time_varying_unknown_categoricals,
target_normalizer = TorchNormalizer())
validation = TimeSeriesDataSet.from_dataset(training, data[lambda x: x.Time_idx <= training_size], min_prediction_idx = training_cutoff + 1)
test = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx = training_size + 1)
train_dataloader = training.to_dataloader(train = True, batch_size = batch_size)
val_dataloader = validation.to_dataloader(train = False, batch_size = training_size - training_cutoff)

# Run learningrate finder model
trainer = pl.Trainer(gradient_clip_val = gradient_clip_val)
tft = TemporalFusionTransformer.from_dataset(
    training,
    hidden_size = hidden_size,
    hidden_continuous_size = hidden_continuous_size,
    attention_head_size = attention_head_size,
    dropout = dropout,
    output_size = output_size,
    loss = loss)
res = Tuner(trainer).lr_find(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr = max_lr,
    min_lr = min_lr)
print(f'suggested learning rate: {res.suggestion()}')
fig = res.plot(show = True, suggest = True)
fig.show()

### Hyperparameter tuning

In [None]:
start = time.time()
list_values, list_parameters = [], []

# Create dataframe
data = dataset(np.random.randint(10), np.random.randint(33))
training = TimeSeriesDataSet(
data[lambda x: x.Time_idx <= training_cutoff],
time_idx = data.columns[0],
target = data.columns[1],
group_ids = [data.columns[2]],
max_encoder_length = max_encoder_length,
max_prediction_length = max_prediction_length,
time_varying_known_reals = [data.columns[0]],
time_varying_unknown_reals = [data.columns[1]] + time_varying_unknown_reals,
time_varying_known_categoricals = time_varying_known_categoricals,
time_varying_unknown_categoricals = time_varying_unknown_categoricals,
target_normalizer = TorchNormalizer())
validation = TimeSeriesDataSet.from_dataset(training, data[lambda x: x.Time_idx <= training_size], min_prediction_idx = training_cutoff + 1)
test = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx = training_size + 1)
train_dataloader = training.to_dataloader(train = True, batch_size = batch_size)
val_dataloader = validation.to_dataloader(train = False, batch_size = training_size - training_cutoff)

# Run tuning model
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path = 'results/Hyperparameter tuning/Individual',
    log_dir = 'results/Hyperparameter tuning/Individual',
    n_trials = 10,
    max_epochs = 100,
    loss = loss,
    output_size = output_size,
    use_learning_rate_finder = False,
    learning_rate_range = (0.001, 0.001),
    trainer_kwargs = {'log_every_n_steps': log_every_n_steps, 'num_sanity_val_steps': 0})
list_values.append(study.best_trial.value)
list_parameters.append(study.best_trial.params)
print(f'Compilation time: {round(time.time() - start)} seconds')
print(*list_values, sep = '\n')
print(*list_parameters, sep = '\n')