In [15]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
import torch
from datetime import datetime, timezone
import logging

logging.basicConfig(
    level=logging.INFO,  # Set the logging level
    format='%(asctime)s - %(levelname)s - %(message)s',  # Format for the log messages
    handlers=[
        logging.StreamHandler()  # Log to the console
    ]
)

%reload_ext autoreload
%autoreload 2
from data.raw.retrievers.alpaca_markets_retriever import AlpacaMarketsRetriever
from config.constants import *
from data.processed.dataset_creation import DatasetCreator
from data.processed.indicators import *
from data.processed.targets import Balanced3ClassClassification
from data.processed.normalization import ZScoreOverWindowNormalizer, ZScoreNormalizer, MinMaxNormalizer
from data.processed.dataset_pytorch import DatasetPytorch
from modeling.trainer import Trainer
from modeling.evaluate import evaluate_lgb_regressor, evaluate_torch_regressor, evaluate_torch_regressor_multiasset
from observability.mlflow_integration import log_experiment

from modeling.rl.environment import PortfolioEnvironment
from modeling.rl.state import State
from modeling.rl.agent import RlAgent
from modeling.rl.algorithms.policy_gradient import PolicyGradient
from modeling.rl.actors.actor import RlActor
from modeling.rl.trajectory_dataset import TrajectoryDataset

from config.experiments.cur_experiment import config

torch.backends.cudnn.benchmark = config.train_config.cudnn_benchmark


In [3]:
retriever = AlpacaMarketsRetriever(download_from_gdrive=False)

retrieval_result = retriever.bars_with_quotes(
    symbol_or_symbols=config.data_config.symbol_or_symbols, 
    start=config.data_config.start, 
    end=config.data_config.end)

In [4]:
dataset_creator = DatasetCreator(
    features=config.data_config.features,
    target=config.data_config.target,
    normalizer=config.data_config.normalizer,
    missing_values_handler=config.data_config.missing_values_handler,
    train_set_last_date=config.data_config.train_set_last_date, 
    in_seq_len=config.data_config.in_seq_len,
    multi_asset_prediction=config.data_config.multi_asset_prediction,
)

X_train, y_train, next_return_train, spread_train, X_test, y_test, next_return_test, spread_test = dataset_creator.create_dataset_numpy(retrieval_result)
X_train.shape, y_train.shape, next_return_train.shape, spread_train.shape, X_test.shape, y_test.shape, next_return_test.shape, spread_test.shape

2025-07-20 18:21:24,113 - INFO - Processing AAPL …
2025-07-20 18:21:24,452 - INFO - Imputing 496 NaN rows out of 97359 with forward fill..
2025-07-20 18:21:24,785 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-20 18:21:24,803 - INFO - Processing AMD …
2025-07-20 18:21:25,131 - INFO - Imputing 214 NaN rows out of 97359 with forward fill..
2025-07-20 18:21:25,445 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-20 18:21:25,461 - INFO - Processing BABA …
2025-07-20 18:21:25,774 - INFO - Imputing 874 NaN rows out of 97359 with forward fill..
2025-07-20 18:21:26,088 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-20 18:21:26,108 - INFO - Processing BITU …
2025-07-20 18:21:26,628 - INFO - Imputing 6493 NaN rows out of 97359 with forward fill..
2025-07-20 18:21:26,946 - INFO - Imputing 39 NaN rows with 0.5 sentinel value
2025-07-20 18:21:26,963 - INFO - Processing CSCO …
2025-07-20 18:21:27,241 - INFO - Imputing 3929 NaN rows out of 97359 with forward

((79909, 50, 120, 15),
 (79909, 50),
 (79909, 50),
 (79909, 50),
 (7251, 50, 120, 15),
 (7251, 50),
 (7251, 50),
 (7251, 50))

In [21]:
train_trajectory_loader = TrajectoryDataset(X_train, next_return_train, spread_train, trajectory_length=16).as_dataloader(
    batch_size=8, 
    shuffle=True,
    num_workers=0,
    prefetch_factor=None,
    pin_memory=False,
    persistent_workers=False,
)
val_trajectory_loader = None

In [22]:
env = PortfolioEnvironment(transaction_fee=0)

In [23]:
signal_predictor = config.model_config.model.to(torch.device('cuda'))
signal_predictor.load_state_dict(torch.load('best_model.pth'))
signal_predictor

  signal_predictor.load_state_dict(torch.load('best_model.pth'))


TemporalSpatial(
  (asset_embed): Embedding(50, 32)
  (asset_proj): Linear(in_features=32, out_features=512, bias=False)
  (lstm): LSTM(15, 256, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
  (spatial_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
  )
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [24]:
actor = RlActor(signal_predictor, n_assets=len(config.data_config.symbol_or_symbols))

In [27]:
rl_agent = RlAgent(actor, env)
policy_gradient = PolicyGradient(rl_agent, train_trajectory_loader, val_trajectory_loader)

In [29]:
policy_gradient.train(epochs=10)

                                                             

[PolicyGradient] Epoch 1/10 — Loss: 0.0034


                                                             

[PolicyGradient] Epoch 2/10 — Loss: 0.0026


                                                             

[PolicyGradient] Epoch 3/10 — Loss: 0.0021


                                                             

[PolicyGradient] Epoch 4/10 — Loss: 0.0018


                                                             

[PolicyGradient] Epoch 5/10 — Loss: 0.0016


                                                             

[PolicyGradient] Epoch 6/10 — Loss: 0.0013


                                                             

[PolicyGradient] Epoch 7/10 — Loss: 0.0011


                                                             

[PolicyGradient] Epoch 8/10 — Loss: 0.0011


                                                             

[PolicyGradient] Epoch 9/10 — Loss: 0.0009


                                                              

[PolicyGradient] Epoch 10/10 — Loss: 0.0006
