In [None]:
import logging

import pandas as pd

from torch.utils.data import DataLoader

# use pytorch_lightning instead lightning.pytorch because optuna use pytorch_lightning
# and using lightning.pytorch produce an error due to importing packages
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import seed_everything

# use leave=False by default, TQDM progress bar harder to configure for me
from pytorch_lightning.callbacks import RichProgressBar

# disable info about hardware
pl.utilities.distributed.log.setLevel(logging.WARNING)
pl.accelerators.gpu._log.setLevel(logging.WARNING)

from dataset.sequence_to_class import SeqToClassDataset

from common.log_scaler import LogScaler

from transformer.lit_transformer import LitTransformer


In [None]:
FEATURES = [
    "avg-block-size",
    "avg-confirmation-time",
    "blocks-size",
    "cost-per-transaction",
    "difficulty",
    "estimated-transaction-volume-usd",
    "estimated-transaction-volume",
    "fees-usd-per-transaction",
    "hash-rate",
    "median-confirmation-time",
    "mempool-count",
    "mempool-growth",
    "mempool-size",
    "n-payments-per-block",
    "n-payments",
    "n-transactions-excluding-popular",
    "n-transactions-per-block",
    "n-transactions-total",
    "n-transactions",
    "n-unique-addresses",
    "output-volume",
    "total-bitcoins",
    "trade-volume",
    "transaction-fees-usd",
    "transaction-fees",
    "utxo-count",
]
TARGETS = ["market-price"]
VAL_START = pd.to_datetime("2022-08-01 00:00:00")
TEST_START = pd.to_datetime("2022-12-20 00:00:00")
DATA_PATH = "../data/btc.csv"
INDEX_COL = "timestamp"


In [None]:
df = pd.read_csv(DATA_PATH, index_col=INDEX_COL)
df.index = pd.to_datetime(df.index)
df = df.iloc[1:-1]

df = df.interpolate()

scaler = LogScaler(df)
df = scaler.fit_transform(df).copy()
df = df.dropna()

df_train = df.loc[df.index < TEST_START].copy()

df_test = df.loc[TEST_START <= df.index].copy()
df


In [None]:
num_layers = 2
max_epoch = 706
batch_size = 8
d_hid = 256
d_model = 64
nhead = 8
dropout = 0.39
sequence_length = 63
forecast_length = 1 

num_inputs = len(FEATURES)
num_outputs = 2

seed_everything(101)

train_dataset = SeqToClassDataset(
    df_train, FEATURES, TARGETS, sequence_length, forecast_length
)
val_dataset = SeqToClassDataset(
    df_test, FEATURES, TARGETS, sequence_length, forecast_length
)

train_loader = DataLoader(train_dataset, batch_size, True)
val_loader = DataLoader(val_dataset, batch_size, False)


model = LitTransformer(
    num_inputs,
    num_outputs,
    d_model,
    nhead,
    d_hid,
    num_layers,
    dropout,
    "classification",
)

version = (
    ""
    + f"epoch={max_epoch}-"
    + f"batch_size={batch_size}-"
    + f"sequence_length={sequence_length}-"
    + f"forecast_length={forecast_length}-"
    + f"n_layers={num_layers}-"
    + f"d_hid={d_hid}-"
    + f"d_model={d_model}-"
    + f"nhead={nhead}-"
    + f"dropout={dropout:.2f}-"
)

logger = TensorBoardLogger(
    "G:/ML-Storage/tb_logs_transformer/", name="transformer_classification_test", version=version
)
trainer = pl.Trainer(
    gpus=1,
    min_epochs=1,
    max_epochs=max_epoch,
    log_every_n_steps=len(train_loader),
    logger=logger,
    callbacks=[
        RichProgressBar(),
    ],
)

trainer.fit(model, train_loader, val_loader)
