In [2]:
%matplotlib inline

# 1) Wipe out all Python variables
%reset -f
# 2) Force Python’s garbage collector to run
import gc
gc.collect()

import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["BLIS_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_WAIT_POLICY"] = "PASSIVE"

import importlib
from libs import plots, params, models
importlib.reload(plots)
importlib.reload(params)
importlib.reload(models)


ValueError: module functions cannot set METH_CLASS or METH_STATIC

In [None]:
# Turn off interactive plotting globally (we’ll manage our own display)
import matplotlib
matplotlib.use("Agg")  # safe, headless-friendly
import matplotlib.pyplot as plt
plt.ioff()

import json
import numpy as np
import pandas as pd
import datetime as dt
from pathlib import Path

import torch
from torch import nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts
from torch.cuda.amp import GradScaler, autocast

import optuna
from optuna.pruners import MedianPruner
from optuna.exceptions import TrialPruned
from optuna.importance import get_param_importances

from tqdm.auto import tqdm


In [None]:
features_cols  = params.features_cols_tick
label_col      = params.label_col
device         = params.device

df_feat = pd.read_csv(params.feat_csv, index_col=0, parse_dates=True)

df_features = df_feat[features_cols + ['signal','ask','bid']]
df_features 


In [None]:
# calculate look_backs under half the interval between the day’s first and sess_start

first_time = (
    df_features.index
        .to_series()
        .groupby(df_features.index.normalize())
        .min()
        .dt.time
        .mode()[0]
)

# convert both times to minutes since midnight
fm = first_time.hour * 60 + first_time.minute
sm = params.sess_start.hour * 60 + params.sess_start.minute

# half the difference, count full 30-min slots, and build multiples
n_steps    = int(((sm - fm) / 2) // 30)      # e.g. floor(165/30) = 5
look_backs = [30 * i for i in range(1, n_steps + 1)]
look_backs


In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# Optuna objective definition
# ──────────────────────────────────────────────────────────────────────────────

def objective(trial):
    
    hp = {
    # ── Architecture ────────────────────────────────────────────────
    "look_back"    : trial.suggest_categorical("look_back", look_backs),
    "DROPOUT_SHORT": trial.suggest_float("DROPOUT_SHORT", 0.05, 0.35),
    "DROPOUT_LONG":  trial.suggest_float("DROPOUT_LONG",  0.05, 0.35),
    "ATT_DROPOUT":   trial.suggest_float("ATT_DROPOUT",   0.05, 0.35),
    
    # ── Optimizer & Scheduler ──────────────────────────────────────
    "INITIAL_LR":    trial.suggest_float("INITIAL_LR",    1e-4, 1e-3),
    "ETA_MIN":       trial.suggest_float("ETA_MIN",       1e-6, 1e-5),
    "WEIGHT_DECAY":  trial.suggest_float("WEIGHT_DECAY",  1e-6, 1e-4),
    "CLIPNORM":      trial.suggest_float("CLIPNORM",      0.5, 2),
    }

    
    print(f"\n▶ Trial {trial.number} starting with:\n{hp}\n")#
    # Build model
    model = models.DualMemoryLSTM(
        n_feats       = len(features_cols),
        short_units   = params.hparams["SHORT_UNITS"],
        long_units    = params.hparams["LONG_UNITS"],
        dropout_short = hp["DROPOUT_SHORT"],
        dropout_long  = hp["DROPOUT_LONG"],
        att_heads     = params.hparams["ATT_HEADS"],
        att_drop      = hp["ATT_DROPOUT"]
    ).to(device)

    # Build optimizer + schedulers + scaler
    optimizer, plateau_sched, _ , scaler, clipnorm = \
        models.make_optimizer_and_scheduler(
            model            = model,
            initial_lr       = hp["INITIAL_LR"],
            weight_decay     = hp["WEIGHT_DECAY"],
            clipnorm         = hp["CLIPNORM"]
        )

    cosine_sched = CosineAnnealingWarmRestarts(
        optimizer, 
        T_0=params.hparams['T_0'], 
        T_mult=params.hparams['T_MULT'], 
        eta_min=hp['ETA_MIN']
    )

    
    # Build LSTM input tensors (disk-backed memmaps)
    #    Returns five tensors on `device`
    X, y, raw_close, raw_bid, raw_ask = models.build_lstm_tensors(
        df             = df_features,
        look_back      = hp["look_back"],
        features_cols  = features_cols,
        label_col      = label_col,
        sess_start     = False # if we want the predictions not to start from sess_start, but from sess_start_pred
    )
    print("Full tensors Shapes:")
    print("  X         =", X.shape,    "(samples, look_back, features)")
    print("  y         =", y.shape,    "(samples,)")
    
    # Split the full sliding‐window dataset (X, y, raw_*) into train, validation,    and test sets by calendar day,    
    (X_tr, y_tr), \
    (X_val, y_val), \
    (X_te, y_te, raw_close_te, raw_bid_te, raw_ask_te), \
    samples_per_day, day_id_tr, day_id_val, day_id_te = models.chronological_split(
        X, y, raw_close, raw_bid, raw_ask, df_features,
        look_back       = hp["look_back"],
        train_prop      = params.train_prop,
        val_prop        = params.val_prop,
        train_batch     = params.hparams['TRAIN_BATCH'],
        sess_start     = False # if we want the predictions not to start from sess_start, but from sess_start_pred

    )
    print("Split tensors Shapes:")
    print("  X_tr =", X_tr.shape)
    print("  X_val =", X_val.shape)
    print("  X_te =", X_te.shape)

    
    #  Build DataLoaders over calendar‐days
    train_loader, val_loader, test_loader = models.split_to_day_datasets(
        # Training split arrays (from chronological_split)
        X_tr, y_tr, day_id_tr,
        # Validation split arrays
        X_val, y_val, day_id_val,
        # Test split arrays + raw prices for post‐tracking
        X_te, y_te, day_id_te, raw_close_te, raw_bid_te, raw_ask_te,
        # Original minute‐bar DataFrame for weekday mapping
        df=df_features,
        train_batch=params.hparams['TRAIN_BATCH'],
        train_workers=params.hparams['NUM_WORKERS'],
        train_prefetch_factor=params.hparams['TRAIN_PREFETCH_FACTOR']
    )

    # Count how many calendar days we see each epoch and Compute baseline RMSE on validation (zero forecast)
    n_train_days = len(train_loader.dataset)  # dataset length = # unique days
    print(f"Training sees {n_train_days} calendar days per epoch\n")
    baseline_val_rmse = models.naive_rmse(val_loader)
    print(f"Baseline (zero‐forecast) RMSE on validation = {baseline_val_rmse:.6f}")
    
    # Run training & return best validation RMSE
    best_rmse = models.custom_stateful_training_loop(
        model               = model,
        optimizer           = optimizer,
        cosine_sched        = cosine_sched,
        plateau_sched       = plateau_sched,
        scaler              = scaler,
        train_loader        = train_loader,
        val_loader          = val_loader,
        max_epochs          = params.hparams["MAX_EPOCHS"],
        early_stop_patience = params.hparams["EARLY_STOP_PATIENCE"],
        baseline_val_rmse   = baseline_val_rmse,
        clipnorm            = clipnorm,
        device              = device,
    )

    del model, optimizer, plateau_sched, cosine_sched, scaler, clipnorm
    del X, y, raw_close, raw_bid, raw_ask
    del X_tr, y_tr, X_val, y_val, X_te, y_te
    del raw_close_te, raw_bid_te, raw_ask_te
    del train_loader, val_loader, test_loader

    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    return best_rmse



In [None]:
# build blank figure & line
fig, ax = plt.subplots(figsize=(7,3))
line, = ax.plot([], [], "bo-")
ax.set(xlabel="Trial #", ylabel="Objective",
       title="Optuna optimization progress")
ax.grid(True)

# display once and grab the handle
handle = display(fig, display_id=True)
plt.close(fig)

# ask plots.py for a callback bound to these objects
live_cb = plots.make_live_plot_callback(fig, ax, line, handle)

In [None]:
# ──────────────────────────────────────────────────────────────────────────────
#  Create Optuna study and run optimization
# ──────────────────────────────────────────────────────────────────────────────

study = optuna.create_study(
    storage="sqlite:///optuna_study.db",    # Point it at an SQLite file so it writes out each result immediately instead of buffering in RAM
    load_if_exists=True,
    direction="minimize",
    pruner=MedianPruner(n_startup_trials=6, n_warmup_steps=12),
)

        
study.optimize(
    objective,
    n_trials = 100,
    n_jobs   = 1,
    callbacks=[live_cb, plots.cleanup_callback],
)

plt.close('all')   # safe here; the final image remains displayed in the notebook output
gc.collect()       # optional extra sweep



In [None]:
# ──────────────────────────────────────────────────────────────────────────────
#  Print out the best hyperparameters & result
# ──────────────────────────────────────────────────────────────────────────────
print("Best hyperparameters:", study.best_params)
print("Best validation RMSE:", study.best_value)

# ──────────────────────────────────────────────────────────────────────────────
#  Compute and print parameter importances
# ──────────────────────────────────────────────────────────────────────────────
imps = get_param_importances(study)
print("\nHyperparameter importances (higher ⇒ more impact on RMSE):")
for name, score in sorted(imps.items(), key=lambda x: x[1], reverse=True):
    print(f"  {name:20s} : {score:.3f}")

# ──────────────────────────────────────────────────────────────────────────────
#  Dump study results to JSON
# ──────────────────────────────────────────────────────────────────────────────
# 1) Build your session‐only DataFrame once
session_df = df.between_time(params.regular_start,
                             params.regular_end)

# 2) Derive the trading‐day boundaries
first_day = session_df.index.normalize().min()
last_day  = session_df.index.normalize().max()

# 3) Format your file name
start_date = first_day.strftime("%Y%m%d")
end_date   = last_day.strftime("%Y%m%d")
file_name  = f"{params.ticker}_{start_date}-{end_date}_optuna_model_hpars.json"
file_path  = os.path.join(results_folder, file_name)


# ------------------------------------------------------------------
# Dump study results (including importances)
# ------------------------------------------------------------------

with open(file_path, "w") as f:
    json.dump(
        {
            "best_params": study.best_params,
            "best_value" : study.best_value,
            "importances": imps,
            "trials": [
                {"number": t.number, "value": t.value, "params": t.params, 
                 "state": t.state.name}
                for t in study.trials
            ],
        },
        f,
        indent=4,
    )

print(f"\nOptuna results (and importances) saved to: {file_path}")
