In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
import torch
from datetime import datetime, timezone
import logging

logging.basicConfig(
    level=logging.INFO,  # Set the logging level
    format='%(asctime)s - %(levelname)s - %(message)s',  # Format for the log messages
    handlers=[
        logging.StreamHandler()  # Log to the console
    ]
)

%reload_ext autoreload
%autoreload 2
from data.raw.retrievers.alpaca_markets_retriever import AlpacaMarketsRetriever
from config.constants import *
from data.processed.dataset_creation import DatasetCreator
from data.processed.indicators import *
from data.processed.targets import Balanced3ClassClassification
from data.processed.normalization import ZScoreOverWindowNormalizer, ZScoreNormalizer, MinMaxNormalizer
from data.processed.dataset_pytorch import DatasetPytorch
from modeling.trainer import Trainer
from modeling.evaluate import evaluate_lgb_regressor, evaluate_torch_regressor, evaluate_torch_regressor_multiasset
from observability.mlflow_integration import log_experiment

from modeling.rl.environment import PortfolioEnvironment
from modeling.rl.state import State
from modeling.rl.agent import RlAgent
from modeling.rl.algorithms.policy_gradient import PolicyGradient
from modeling.rl.actors.actor import RlActor
from modeling.rl.trajectory_dataset import TrajectoryDataset
from modeling.rl.metrics import MetricsCalculator, DEFAULT_METRICS
from modeling.rl.reward import estimated_return_reward
from modeling.rl.loss import log_cumulative_trajectory_return_loss

from config.experiments.cur_experiment import config

torch.backends.cudnn.benchmark = config.train_config.cudnn_benchmark


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
retriever = AlpacaMarketsRetriever(download_from_gdrive=False)

retrieval_result = retriever.bars_with_quotes(
    symbol_or_symbols=config.data_config.symbol_or_symbols, 
    start=config.data_config.start, 
    end=config.data_config.end)

In [3]:
dataset_creator = DatasetCreator(
    features=config.data_config.features,
    target=config.data_config.target,
    normalizer=config.data_config.normalizer,
    missing_values_handler=config.data_config.missing_values_handler,
    train_set_last_date=config.data_config.train_set_last_date, 
    in_seq_len=config.data_config.in_seq_len,
    multi_asset_prediction=config.data_config.multi_asset_prediction,
)

X_train, y_train, next_return_train, spread_train, X_test, y_test, next_return_test, spread_test = dataset_creator.create_dataset_numpy(retrieval_result)
X_train.shape, y_train.shape, next_return_train.shape, spread_train.shape, X_test.shape, y_test.shape, next_return_test.shape, spread_test.shape

2025-07-23 15:51:54,632 - INFO - Processing AAPL …
2025-07-23 15:51:55,025 - INFO - Imputing 496 NaN rows out of 97359 with forward fill..
2025-07-23 15:51:55,386 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-23 15:51:55,407 - INFO - Processing AMD …
2025-07-23 15:51:55,766 - INFO - Imputing 214 NaN rows out of 97359 with forward fill..
2025-07-23 15:51:56,103 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-23 15:51:56,123 - INFO - Processing BABA …
2025-07-23 15:51:56,733 - INFO - Imputing 874 NaN rows out of 97359 with forward fill..
2025-07-23 15:51:57,085 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-23 15:51:57,107 - INFO - Processing BITU …
2025-07-23 15:51:57,601 - INFO - Imputing 6493 NaN rows out of 97359 with forward fill..
2025-07-23 15:51:57,953 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-23 15:51:57,973 - INFO - Processing CSCO …
2025-07-23 15:51:58,292 - INFO - Imputing 3929 NaN rows out of 97359 with forward

((79909, 50, 120, 15),
 (79909, 50),
 (79909, 50),
 (79909, 50),
 (7251, 50, 120, 15),
 (7251, 50),
 (7251, 50),
 (7251, 50))

In [4]:
train_trajectory_loader = TrajectoryDataset(X_train, next_return_train, spread_train, trajectory_length=16).as_dataloader(
    batch_size=8, 
    shuffle=True,
    num_workers=8,
    prefetch_factor=4,
    pin_memory=True,
    persistent_workers=True,
    drop_last=True,
)
val_trajectory_loader = TrajectoryDataset(X_test, next_return_test, spread_test, trajectory_length=16).as_dataloader(
    batch_size=8, 
    shuffle=False,
    num_workers=8,
    prefetch_factor=4,
    pin_memory=True,
    persistent_workers=True,
    drop_last=True,
)

In [5]:
env = PortfolioEnvironment(
    reward_function=estimated_return_reward,
    transaction_fee=0
    )

In [6]:
signal_predictor = config.model_config.model.to(torch.device('cuda'))
signal_predictor.load_state_dict(torch.load('../modeling/checkpoints/best_model.pth'))
signal_predictor

  signal_predictor.load_state_dict(torch.load('../modeling/checkpoints/best_model.pth'))


TemporalSpatial(
  (asset_embed): Embedding(50, 32)
  (asset_proj): Linear(in_features=32, out_features=512, bias=False)
  (lstm): LSTM(15, 256, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
  (spatial_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
  )
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [7]:
actor = RlActor(
    signal_predictor, 
    n_assets=len(config.data_config.symbol_or_symbols),
    hidden_dim=128,
    train_signal_predictor=False)

In [8]:
rl_agent = RlAgent(
    actor, 
    env
)

metrics_calculator = MetricsCalculator(
    metrics=DEFAULT_METRICS
)
policy_gradient = PolicyGradient(
    rl_agent, 
    train_trajectory_loader, 
    val_trajectory_loader, 
    metrics_calculator=metrics_calculator,
    optimizer=torch.optim.Adam([p for p in actor.parameters() if p.requires_grad], lr=1e-3),
    scheduler=None,
    loss_fn=log_cumulative_trajectory_return_loss,
    num_epochs=10
)

In [10]:
policy_gradient.train()

                                                         

RuntimeError: Couldn't open shared file mapping: <torch_42056_1779087268_1>, error code: <1450>