In [None]:
import lightning.pytorch as pl
import numpy as np
import pandas as pd
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet, CrossEntropy
from pytorch_forecasting.metrics import QuantileLoss
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.mixture import GaussianMixture


def add_features(raw_returns: pd.Series) -> pd.DataFrame:
    features = {}
    hls = [5, 20, 60]

    for hl in hls:
        # Feature 1: EWM-ret
        features[f'ret_{hl}'] = raw_returns.ewm(halflife=hl).mean()
        # Feature 2: log(EWM-DD)
        sq_mean = np.minimum(raw_returns, 0.).pow(2).ewm(halflife=hl).mean()
        dd = np.sqrt(sq_mean)
        features[f'dd-log_{hl}'] = np.log(dd)
        # Feature 3: EWM-Sortino-ratio = EWM-ret/EWM-DD
        features[f'sortino_{hl}'] = features[f'ret_{hl}'].div(dd)

    return pd.DataFrame(features)


def identify_regimes(df):
    # Create regime-specific features
    scaled_features = StandardScaler().fit_transform(df)

    # Apply Gaussian Mixture Model
    gmm = GaussianMixture(n_components=2, random_state=42)
    regimes = gmm.fit_predict(scaled_features)

    # Ensure we have the right length
    full_regimes = np.full(len(df), -1)
    start_idx = len(df) - len(regimes)
    full_regimes[start_idx:] = regimes

    return full_regimes


df = pd.read_csv('sp500.csv', parse_dates=['date'])
returns = df['returns']
data = add_features(returns)
data['returns'] = returns
data['regime'] = identify_regimes(data).astype(str)
data['date'] = df['date']
data['price'] = df['close']
data['fin_type'] = 'sp500'

# add time index
data["time_idx"] = data.index
data

In [None]:
max_prediction_length = 30
max_encoder_length = 24
training_cutoff = 5808

training = TimeSeriesDataSet(
    data[:5808],
    time_idx="time_idx",
    target="regime",
    group_ids=["fin_type"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["fin_type"],
    time_varying_known_reals=["time_idx", "returns"],
    time_varying_unknown_categoricals=['regime'],
    time_varying_unknown_reals=[
        "ret_5",
        "dd-log_5",
        "sortino_5",
        "ret_20",
        "dd-log_20",
        "sortino_20",
        "ret_60",
        "dd-log_60",
        "sortino_60",
    ],
    add_encoder_length=True,
)

# create a validation set (predict=True) which means to predict the last max_prediction_length points in time
# for each series
validation = TimeSeriesDataSet.from_dataset(
    training, data, predict=True, stop_randomization=True
)

# create dataloaders for model
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(
    train=True, batch_size=batch_size, num_workers=0
)
val_dataloader = validation.to_dataloader(
    train=False, batch_size=batch_size * 10, num_workers=0
)

In [None]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    accelerator="cpu",
    gradient_clip_val=0.1,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.03,
    hidden_size=8,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=2,
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    loss=QuantileLoss(),
    optimizer="ranger",
    # reduce learning rate if no improvement in validation loss after x epochs
    # reduce_on_plateau_patience=1000,
)
print(f"Number of parameters in network: {tft.size() / 1e3:.1f}k")

In [None]:
# find optimal learning rate
from lightning.pytorch.tuner import Tuner

res = Tuner(trainer).lr_find(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
)

print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()

In [None]:
# configure network and trainer
early_stop_callback = EarlyStopping(
    monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min"
)
lr_logger = LearningRateMonitor()  # log the learning rate

trainer = pl.Trainer(
    max_epochs=50,
    accelerator="cpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.0158,
    hidden_size=16,
    attention_head_size=2,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=CrossEntropy(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="ranger",
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size() / 1e3:.1f}k")

In [None]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

In [None]:
predictions = tft.predict(val_dataloader, return_y=True, trainer_kwargs=dict(accelerator="cpu"), mode="raw")

In [None]:
all_predictions = []
all_predictions_smoothed = []

for t in range(training_cutoff, 11010, 30):
    validation = TimeSeriesDataSet.from_dataset(
        training, data[:t], predict=True, stop_randomization=True
    )

    val_dataloader = validation.to_dataloader(
        train=False, batch_size=batch_size * 10, num_workers=0
    )

    pred = tft.predict(val_dataloader, return_y=True, trainer_kwargs=dict(accelerator="cpu")).y[0]
    all_predictions.append(pred)
    all_predictions_smoothed.append(1 if pred.sum() > 15 else 0)

predictions = np.array(all_predictions).flatten()
predictions_smoothed = np.array(all_predictions_smoothed)

In [None]:
from evaluation import plot_regimes, evaluate_strategy
import matplotlib.pyplot as plt
import seaborn as sns

regimes_plotted = np.array(all_predictions_smoothed).repeat(30)

plot_regimes(df['date'].values[training_cutoff:11012], df['close'].values[training_cutoff:11012], regimes_plotted[16:11012 - training_cutoff + 16])

for d in range(0, 17):
    print('Delta ' + str(d))
    evaluate_strategy(df['returns'].values[training_cutoff:11012], regimes_plotted[d:11012 - training_cutoff + d], 1)
    print(' ')
    print(' ')

sns.set_theme(context="paper", style='whitegrid')

evaluate_strategy(df['returns'].values[training_cutoff:11012], regimes_plotted[d:11012 - training_cutoff + d], 1)

evaluate_strategy(df['returns'].values[training_cutoff:11012 ], regimes_plotted[:11012 - training_cutoff], 0)

In [None]:
evaluate_strategy(df['returns'].values[training_cutoff - delta:11028 - delta], all_predictions, 1)