In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
import torch
from datetime import datetime, timezone
import logging

logging.basicConfig(
    level=logging.INFO,  # Set the logging level
    format='%(asctime)s - %(levelname)s - %(message)s',  # Format for the log messages
    handlers=[
        logging.StreamHandler()  # Log to the console
    ]
)

%reload_ext autoreload
%autoreload 2
from data.raw.retrievers.alpaca_markets_retriever import AlpacaMarketsRetriever
from config.constants import *
from data.processed.dataset_creation import DatasetCreator
from data.processed.indicators import *
from data.processed.targets import Balanced3ClassClassification
from data.processed.normalization import ZScoreOverWindowNormalizer, ZScoreNormalizer, MinMaxNormalizer
from data.processed.dataset_pytorch import DatasetPytorch
from modeling.trainer import Trainer
from modeling.evaluate import evaluate_lgb_regressor, evaluate_torch_regressor, evaluate_torch_regressor_multiasset
# from observability.mlflow_integration import log_experiment

from modeling.rl.environment import PortfolioEnvironment
from modeling.rl.state import State
from modeling.rl.agent import RlAgent
from modeling.rl.algorithms.policy_gradient import PolicyGradient
from modeling.rl.actors.actor import RlActor, FullyConnectedBackend, TransformerBackend
from modeling.rl.actors.signal_predictor_actor import SignalPredictorActor
from modeling.rl.actors.high_energy_low_friction_actor import HighEnergyLowFrictionActor
from modeling.rl.actors.xsmom_actor import XSMomActor
from modeling.rl.actors.tsmom_actor import TSMomActor
from modeling.rl.actors.blsw_actor import BLSWActor
from modeling.rl.trajectory_dataset import TrajectoryDataset
from modeling.rl.metrics import MetricsCalculator, DEFAULT_METRICS
from modeling.rl.reward import EstimatedReturnReward
from modeling.rl.loss import SumLogReturnLoss
from modeling.rl.visualization.wealth_plot import plot_cumulative_wealth
from modeling.rl.visualization.position_plot import plot_position_heatmap
from config.experiments.cur_experiment import config

torch.backends.cudnn.benchmark = config.train_config.cudnn_benchmark

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cuda')

In [2]:
retriever = AlpacaMarketsRetriever(download_from_gdrive=True)

retrieval_result = retriever.bars_with_quotes(
    symbol_or_symbols=config.data_config.symbol_or_symbols,
    start=config.data_config.start,
    end=config.data_config.end)

Downloading...
From (original): https://drive.google.com/uc?id=1oE69lvgomUzIqJHCOJ4N1g6opWuv1coW
From (redirected): https://drive.google.com/uc?id=1oE69lvgomUzIqJHCOJ4N1g6opWuv1coW&confirm=t&uuid=e0188477-d002-4a6b-af33-bd71e868eb89
To: /workspace/intraday-portfolio-management/data/raw/alpaca/bars_with_quotes/1Min_2024-06-01-2025-06-01_AAPL+AMD+BABA+BITU+C+CSCO+DAL+DIA+GLD+GOOG+IJR+MARA+MRVL+MU+NEE+NKE+NVDA+ON+PLTR+PYPL+QLD+QQQ+QQQM+R.pkl
100%|██████████| 587M/587M [00:03<00:00, 167MB/s]  
Downloading...
From: https://drive.google.com/uc?id=1fBIwQMGOf-cV5IN-psvWqSV2I3MvPum_
To: /workspace/intraday-portfolio-management/modeling/checkpoints/best_model.pth
100%|██████████| 25.4M/25.4M [00:00<00:00, 103MB/s] 


In [3]:
dataset_creator = DatasetCreator(
    features=config.data_config.features,
    target=config.data_config.target,
    normalizer=config.data_config.normalizer,
    missing_values_handler=config.data_config.missing_values_handler,
    train_set_last_date=config.data_config.train_set_last_date, 
    in_seq_len=config.data_config.in_seq_len,
    multi_asset_prediction=config.data_config.multi_asset_prediction,
)

X_train, y_train, next_return_train, spread_train, volatility_train, X_test, y_test, next_return_test, spread_test, volatility_test = dataset_creator.create_dataset_numpy(retrieval_result)
X_train.shape, y_train.shape, next_return_train.shape, spread_train.shape, volatility_train.shape, X_test.shape, y_test.shape, next_return_test.shape, spread_test.shape, volatility_test.shape

2025-08-11 16:14:09,565 - INFO - Processing AAPL …
2025-08-11 16:14:09,820 - INFO - Imputing 496 NaN rows out of 97359 with forward fill..
2025-08-11 16:14:10,046 - INFO - Spread has 0 NaNs
2025-08-11 16:14:10,095 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-08-11 16:14:10,103 - INFO - Processing AMD …
2025-08-11 16:14:10,325 - INFO - Imputing 214 NaN rows out of 97359 with forward fill..
2025-08-11 16:14:10,536 - INFO - Spread has 0 NaNs
2025-08-11 16:14:10,584 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-08-11 16:14:10,592 - INFO - Processing BABA …
2025-08-11 16:14:10,920 - INFO - Imputing 874 NaN rows out of 97359 with forward fill..
2025-08-11 16:14:11,132 - INFO - Spread has 0 NaNs
2025-08-11 16:14:11,180 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-08-11 16:14:11,188 - INFO - Processing BITU …
2025-08-11 16:14:11,387 - INFO - Imputing 6493 NaN rows out of 97359 with forward fill..
2025-08-11 16:14:11,602 - INFO - Spread has 0 NaNs
2025-0

((79741, 50, 60, 15),
 (79741, 50),
 (79741, 50),
 (79741, 50),
 (79741, 50),
 (7291, 50, 60, 15),
 (7291, 50),
 (7291, 50),
 (7291, 50),
 (7291, 50))

In [4]:
# X_test, y_test, next_return_test, spread_test, volatility_test = X_test[:7000], y_test[:7000], next_return_test[:7000], spread_test[:7000], volatility_test[:7000]

In [5]:
y_train.mean(), y_test.mean()

(0.49983266, 0.50164175)

In [6]:
train_loader = DatasetPytorch(X_train, y_train, learning_task='regression').as_dataloader(
    batch_size=config.train_config.batch_size,
    shuffle=config.train_config.shuffle,
    num_workers=config.train_config.num_workers,
    prefetch_factor=config.train_config.prefetch_factor,
    pin_memory=config.train_config.pin_memory,
    persistent_workers=config.train_config.persistent_workers,
    drop_last=config.train_config.drop_last
)
test_loader = DatasetPytorch(X_test, y_test, learning_task='regression').as_dataloader(
    batch_size=config.train_config.batch_size,
    shuffle=config.train_config.shuffle,
    num_workers=config.train_config.num_workers,
    prefetch_factor=config.train_config.prefetch_factor,
    pin_memory=config.train_config.pin_memory,
    persistent_workers=config.train_config.persistent_workers,
    drop_last=config.train_config.drop_last
)

In [7]:
model = config.model_config.model
model

TemporalSpatial(
  (asset_embed): Embedding(50, 16)
  (asset_proj): Linear(in_features=16, out_features=256, bias=False)
  (lstm): LSTM(15, 128, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (spatial_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
  )
  (fc): Linear(in_features=256, out_features=1, bias=True)
  (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [8]:
config

ExperimentConfig(data_config=DataConfig(symbol_or_symbols=['AAPL', 'AMD', 'BABA', 'BITU', 'C', 'CSCO', 'DAL', 'DIA', 'GLD', 'GOOG', 'IJR', 'MARA', 'MRVL', 'MU', 'NEE', 'NKE', 'NVDA', 'ON', 'PLTR', 'PYPL', 'QLD', 'QQQ', 'QQQM', 'RKLB', 'RSP', 'SMCI', 'SMH', 'SOXL', 'SOXX', 'SPXL', 'SPY', 'TMF', 'TNA', 'TQQQ', 'TSLA', 'UBER', 'UDOW', 'UPRO', 'VOO', 'WFC', 'XBI', 'XLC', 'XLE', 'XLI', 'XLK', 'XLU', 'XLV', 'XLY', 'XOM', 'XRT'], start=datetime.datetime(2024, 6, 1, 0, 0), end=datetime.datetime(2025, 6, 1, 0, 0), features={'log_ret': <function <lambda> at 0x7f96b70f6ca0>, 'hl_range': <function <lambda> at 0x7f96b70f6de0>, 'close_open': <function <lambda> at 0x7f96b6f08a40>, 'vol_delta': <function <lambda> at 0x7f96b6f08ae0>, 'EMA_fast': <data.processed.indicators.EMA object at 0x7f96b70bda90>, 'EMA_slow': <data.processed.indicators.EMA object at 0x7f96b709cb90>, 'RSI2': <data.processed.indicators.RSI object at 0x7f96b704ff50>, 'RSI6': <data.processed.indicators.RSI object at 0x7f96c720af90>, '

In [9]:
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=test_loader,
    loss_fn=config.train_config.loss_fn,
    optimizer=config.train_config.optimizer,
    scheduler=config.train_config.scheduler,
    num_epochs=config.train_config.num_epochs,
    device=config.train_config.device,
    metrics=config.train_config.metrics,
    save_path=config.train_config.save_path
)

In [10]:
# 0.1021
# 0.3199

In [11]:
model, history = trainer.train()

2025-08-11 16:14:37,973 - INFO - Epoch 1/20
2025-08-11 16:15:24,477 - INFO - Train Loss: 0.1358        
2025-08-11 16:15:24,477 - INFO - Train Rmse: 0.3670
2025-08-11 16:15:24,477 - INFO - Val   Loss: 0.1219
2025-08-11 16:15:24,477 - INFO - Val   Rmse: 0.3490
2025-08-11 16:15:24,477 - INFO - 
2025-08-11 16:15:24,479 - INFO - Epoch 2/20
2025-08-11 16:16:05,813 - INFO - Train Loss: 0.1253        
2025-08-11 16:16:05,814 - INFO - Train Rmse: 0.3539
2025-08-11 16:16:05,814 - INFO - Val   Loss: 0.1171
2025-08-11 16:16:05,814 - INFO - Val   Rmse: 0.3421
2025-08-11 16:16:05,815 - INFO - 
2025-08-11 16:16:05,816 - INFO - Epoch 3/20
2025-08-11 16:16:47,121 - INFO - Train Loss: 0.1240        
2025-08-11 16:16:47,121 - INFO - Train Rmse: 0.3521
2025-08-11 16:16:47,122 - INFO - Val   Loss: 0.1160
2025-08-11 16:16:47,122 - INFO - Val   Rmse: 0.3405
2025-08-11 16:16:47,122 - INFO - 
2025-08-11 16:16:47,123 - INFO - Epoch 4/20
2025-08-11 16:17:28,418 - INFO - Train Loss: 0.1235        
2025-08-11 16:

In [12]:
train_trajectory_loader = TrajectoryDataset(X_train, next_return_train, spread_train, volatility_train, trajectory_length=16).as_dataloader(
    batch_size=8, 
    shuffle=True,
    num_workers=8,
    prefetch_factor=4,
    pin_memory=True,
    persistent_workers=True,
    drop_last=True,
)
val_trajectory_loader = TrajectoryDataset(X_test, next_return_test, spread_test, volatility_test, trajectory_length=16).as_dataloader(
    batch_size=8, 
    shuffle=False,
    num_workers=8,
    prefetch_factor=4,
    pin_memory=True,
    persistent_workers=True,
    drop_last=True,
)

In [22]:
env = PortfolioEnvironment(
    reward_function=EstimatedReturnReward(fee=0.0, spread_multiplier=0.99),
)

backend = FullyConnectedBackend(
    n_assets=len(config.data_config.symbol_or_symbols),
    hidden_dim=128,
    num_layers=2, 
    dropout=0.1,
    use_layer_norm=False,
)

actor = RlActor(
    model, 
    backend,
    n_assets=len(config.data_config.symbol_or_symbols),
    train_signal_predictor=False, 
    exploration_eps=0.0
).to(device)

signal_predictor_actor = SignalPredictorActor(
    model, 
    trade_asset_count=1,
    train_signal_predictor=False
).to(device)

rl_agent = RlAgent(
    actor, 
    env,
    single_action_per_trajectory=False
)

metrics_calculator = MetricsCalculator(
    metrics=DEFAULT_METRICS
)

policy_gradient = PolicyGradient(
    rl_agent, 
    train_trajectory_loader, 
    val_trajectory_loader, 
    metrics_calculator=metrics_calculator,
    optimizer=torch.optim.AdamW(
        [p for p in actor.parameters() if p.requires_grad], 
        lr=1e-3,
        weight_decay=1e-5,
        amsgrad=True),
    scheduler=None,
    loss_fn=SumLogReturnLoss(use_baseline=False),
    num_epochs=10,
    device=device
)

In [23]:
epoch_loss, realized_returns_signal_predictor, actions_signal_predictor = policy_gradient.evaluate(signal_predictor_actor)

2025-08-11 16:32:01,912 - INFO - [PolicyGradient] [VAL] Epoch 0/10 — CumulativeReturn: 0.4364, MeanReturnPercentage: 0.0054


[PolicyGradient] [VAL] Epoch 0/10 — Loss: -0.0453


In [17]:
plot_cumulative_wealth(
    returns_dict={
        'Signal Predictor': realized_returns_signal_predictor,
    }, 
    start_time=config.data_config.train_set_last_date, 
    end_time=config.data_config.end
)

TypeError: can't subtract offset-naive and offset-aware datetimes

In [24]:
import copy  # Local import to avoid polluting global namespace unnecessarily
state_dict = (
    model.module.state_dict()
        if isinstance(model, torch.nn.DataParallel)
    else model.state_dict()
)

# Keep a local copy of the best weights so we can return the best model
# after training finishes, without needing to reload from disk.
best_model_state = copy.deepcopy(state_dict)

# Persist to disk if a save_path was provided
torch.save(state_dict, "best_model.pth")

In [20]:
log_experiment(
    config=config, 
    model=model, 
    history=history,
    input_data_sample=next(iter(train_loader))[0].to(trainer.device))

Registered model 'LSTM Default' already exists. Creating a new version of this model...
2025/06/26 15:35:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LSTM Default, version 10


🏃 View run gentle-loon-699 at: http://127.0.0.1:8080/#/experiments/439216085822475480/runs/54deb1104660468d9ffb4e7e278e9cfb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/439216085822475480


Created version '10' of model 'LSTM Default'.


In [10]:
evaluate_lgb_regressor(X_train, y_train, X_test, y_test, next_return_test)



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000873 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9435
[LightGBM] [Info] Number of data points in the train set: 7371, number of used features: 37
[LightGBM] [Info] Start training from score 0.497863
Train rmse: 0.26411260601695974, Test rmse: 0.2684210886033184, Baseline rmse: 0.2599985897541046
Expected return: 0.00010183148393891163, Baseline return: 2.569958041931386e-06, Max possible return 0.00048079571570269763


