In [None]:
import logging
import gc

from IPython.display import clear_output

import pandas as pd


import torch
from torch.utils.data import DataLoader

# use pytorch_lightning instead lightning.pytorch because optuna use pytorch_lightning
# and using lightning.pytorch produce an error due to importing packages
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import seed_everything

# use leave=False by default, TQDM progress bar harder to configure for me
from pytorch_lightning.callbacks import RichProgressBar

# disable info about hardware
pl.utilities.distributed.log.setLevel(logging.WARNING)
pl.accelerators.gpu._log.setLevel(logging.WARNING)

import optuna
from optuna.integration import PyTorchLightningPruningCallback

from dataset.sequence_to_price import SeqToPriceDataset
from dataset.sequence_to_class import SeqToClassDataset


from common.log_scaler import LogScaler

from lstm.lit_rnn import LitRNN


In [None]:
FEATURES = [
    "avg-block-size",
    "avg-confirmation-time",
    "blocks-size",
    "cost-per-transaction",
    "difficulty",
    "estimated-transaction-volume-usd",
    "estimated-transaction-volume",
    "fees-usd-per-transaction",
    "hash-rate",
    "median-confirmation-time",
    "mempool-count",
    "mempool-growth",
    "mempool-size",
    "n-payments-per-block",
    "n-payments",
    "n-transactions-excluding-popular",
    "n-transactions-per-block",
    "n-transactions-total",
    "n-transactions",
    "n-unique-addresses",
    "output-volume",
    "total-bitcoins",
    "trade-volume",
    "transaction-fees-usd",
    "transaction-fees",
    "utxo-count",
]
TARGETS = ["market-price"]
VAL_START = pd.to_datetime("2022-08-01 00:00:00")
TEST_START = pd.to_datetime("2022-12-20 00:00:00")
DATA_PATH = "../data/btc.csv"
INDEX_COL = "timestamp"


In [None]:
df = pd.read_csv(DATA_PATH, index_col=INDEX_COL)
df.index = pd.to_datetime(df.index)
df = df.iloc[1:-1]
df = df.interpolate()

scaler = LogScaler(df)
df = scaler.fit_transform(df).copy()
df = df.dropna()

df_train = df.loc[df.index < VAL_START].copy()
df_val = df.loc[(VAL_START <= df.index) & (df.index < TEST_START)].copy()

df.head()


In [None]:
def objective(trial):
    # clear previous progress bar for does not showing unnecessary stuff
    clear_output(wait=True)

    num_layers = trial.suggest_int("num_layers", 2, 8)
    max_epoch = trial.suggest_int("max_epoch", 5, 2000)
    batch_size = trial.suggest_int("batch_size", 16, 128)
    d_hid = trial.suggest_int("d_hid", 2, 256)
    dropout = trial.suggest_float("dropout", 0.2, 0.8)
    sequence_length = trial.suggest_int("sequence_length", 5, 96)
    forecast_length = 1

    num_inputs = len(FEATURES)
    num_outputs = 2

    seed_everything(101)

    train_dataset = SeqToClassDataset(
        df_train, FEATURES, TARGETS, sequence_length, forecast_length
    )
    val_dataset = SeqToClassDataset(
        df_val, FEATURES, TARGETS, sequence_length, forecast_length
    )

    train_loader = DataLoader(train_dataset, batch_size, True)
    val_loader = DataLoader(val_dataset, batch_size, False)

    model = LitRNN(
        num_inputs, d_hid, num_outputs, num_layers, dropout, "classification"
    )

    version = (
        ""
        + f"epoch={max_epoch}-"
        + f"batch_size={batch_size}-"
        + f"sequence_length={sequence_length}-"
        + f"n_layers={num_layers}-"
        + f"d_hid={d_hid}-"
        + f"dropout={dropout:.2f}"
    )

    logger = TensorBoardLogger(
        "G:/ML-storage/tb_logs/", name="lstm_classification_final", version=version
    )
    trainer = pl.Trainer(
        gpus=1,
        min_epochs=1,
        max_epochs=max_epoch,
        log_every_n_steps=len(train_loader),
        logger=logger,
        callbacks=[
            PyTorchLightningPruningCallback(trial, monitor="val_acc_epoch"),
            RichProgressBar(),
        ],
    )

    trainer.fit(model, train_loader, val_loader)

    del model
    del train_loader
    del val_loader
    gc.collect()
    torch.cuda.empty_cache()

    return trainer.callback_metrics["val_acc_epoch"].item()


In [None]:
pruner = optuna.pruners.MedianPruner(n_startup_trials=1e2, n_warmup_steps=5e4)
study = optuna.create_study(
    direction="maximize",
    pruner=pruner,
    storage="sqlite:///lstm_classification_final_db.sqlite3",
    study_name="lstm_classification",
    load_if_exists=True,
)
study.optimize(objective, n_trials=400)

print("Number of finished trials:", len(study.trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")
