In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import EvalCallback
import stable_baselines3
from portfolio_env_framework import *
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy.typing as npt
import gymnasium as gym

In [2]:
class ProfitReward(AbstractRewardManager):
    def __init__(self):
        pass

    def initialize_reward(self):
        pass

    def compute_reward(self, old_port_val: float, new_port_val: float) -> float:
        return new_port_val - old_port_val

In [3]:
class TrainDataManager(AbstractDataManager):
    def get_obs_space(self) -> gym.spaces.Box:
        return gym.spaces.Dict({
            'data': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.universe_size, 6), dtype=np.float32),
            'weights': gym.spaces.Box(low=0, high=1, shape=(self.universe_size+1,), dtype=np.float32)
        })

    def get_data(self) -> tuple[int, int]:
        # read SNP data
        df = pd.read_csv('crsp_snp100_2010_to_2024.csv', dtype='string')
    
        # convert datatypes
        df = df[['date', 'TICKER', 'PRC', 'VOL', 'ASKHI', 'BIDLO', 'FACPR']]
        df.date = pd.to_datetime(df.date)
        df.FACPR = df.FACPR.fillna('0.0')
        df.astype({
            'PRC': float,
            'VOL': float,
            'ASKHI': float,
            'BIDLO': float,
            'FACPR': float
        })
    
        # drop duplicates and nans
        df = df.drop_duplicates(subset=['date', 'TICKER'])
        df.dropna(inplace=True)
    
        # only include stocks that are present in all dates
        lst = ['AAPL', 'AMD', 'AMZN', 'IBM',]
        ticker_ok = df.TICKER.value_counts() == df.TICKER.value_counts().max()
        def is_max_val_count(ticker: str) -> bool:
          return ticker_ok[ticker] and (ticker in lst)
        ok = df.apply(lambda row: is_max_val_count(row['TICKER']), axis=1)
        df = df[ok]
        df = df[(df.date.dt.year >= 2010) & (df.date.dt.year <= 2019)]
    
        # create stock array
        self.stock_df = df.pivot(index='date', columns='TICKER', values='PRC').astype(float)
        self.high_df = df.pivot(index='date', columns='TICKER', values='ASKHI').astype(float)
        self.low_df = df.pivot(index='date', columns='TICKER', values='BIDLO').astype(float)
        
        # adjust for stock splits
        facpr_df = df.pivot(index='date', columns='TICKER', values='FACPR').astype(float)
        self.stock_df = self.stock_df * (1+facpr_df).cumprod(axis=0)
        self.high_df = self.high_df * (1+facpr_df).cumprod(axis=0)
        self.low_df = self.low_df * (1+facpr_df).cumprod(axis=0)
        self.ret = np.log(self.stock_df.pct_change().iloc[1:, :] + 1)
    
        # get times and dickers
        self.times = df.date.unique()[1:]
        self.tickers = df.TICKER.unique()
        
        self.num_time_periods = len(self.times)-15-1
        self.universe_size = len(self.tickers)
        print(f"{self.universe_size=}")
        return self.num_time_periods, self.universe_size
    
    def get_state(self, t: int, w: npt.NDArray[np.float64], port_val: np.float64) -> npt.NDArray[np.float64]:
        # today is self.times[self.t+15]
        s = np.random.rand(self.universe_size, 6)
        # s = np.zeros((self.universe_size, 6))
        s[:, 1:] = self.stock_df.loc[self.times[t:t+5], :].to_numpy().T
        # s[:, 3:] = np.random.rand(self.universe_size, 3)
        s[:, 0] = np.linspace(0, 1, self.universe_size)
        return {'data': s, 'weights': w}

    def get_prices(self, t: int) -> npt.NDArray[np.float64]:
        # today is self.times[self.t+3]
        return np.append(self.stock_df.loc[self.times[t+3], :].to_numpy().flatten(), 1.0)

In [4]:
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

class Custom_EIEE_CNN_Extractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Dict, features_dim: int = 37):
        super(Custom_EIEE_CNN_Extractor, self).__init__(observation_space, features_dim)
        self.universe_size, data_len = observation_space['data'].shape
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 2, kernel_size=(1, 3)),
            nn.ReLU(),
            nn.Conv2d(2, 4, kernel_size=(1, 3))
        ).cuda()

    def forward(self, observations: dict[str, torch.Tensor]) -> torch.Tensor:
        # print(f"{observations['data'][:, None, :, :].shape=}")
        x = self.cnn(observations['data'][:, None, :, :])
        # print(f"post cnn {x.shape=}, {observations['weights'].shape=}")
        # x = torch.hstack([x.flatten(start_dim=1), observations['weights']])
        # return x.flatten(start_dim=1)
        # print(f"{x.flatten(start_dim=1).shape=}, {observations['weights'].flatten(start_dim=1).shape=}")
        return torch.cat((x.flatten(start_dim=1), observations['weights'].flatten(start_dim=1)), dim=1)

In [5]:
# Parallel environments
from stable_baselines3 import DDPG, SAC
from stable_baselines3.common.noise import NormalActionNoise

train_env = PortfolioEnvWithTCost(dm=TrainDataManager(), rm=ProfitReward(), cp=0.01, cs=0.01)

# Set seeds
random.seed(42)
np.random.seed(42)
train_env.action_space.seed(43)
torch.manual_seed(42)

model = DDPG('MultiInputPolicy', train_env, buffer_size=5*10**7, verbose=1, policy_kwargs={
  'features_extractor_class': Custom_EIEE_CNN_Extractor,
}, action_noise=NormalActionNoise(mean=0, sigma=0.02*np.ones(5)))
model.learn(total_timesteps=10**10, log_interval=1)

self.universe_size=4
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.27e+05 |
| time/              |           |
|    episodes        | 1         |
|    fps             | 109       |
|    time_elapsed    | 22        |
|    total_timesteps | 2499      |
| train/             |           |
|    actor_loss      | 887       |
|    critic_loss     | 184       |
|    learning_rate   | 0.001     |
|    n_updates       | 2398      |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.25e+05 |
| time/              |           |
|    episodes        | 2         |
|    fps             | 109       |
|    time_elapsed    | 45        |
|    total_timesteps | 4998      |
| train/             |           |
|    actor_loss      | 1.36e+03  |
|    critic_loss     | 272       |
|    learning_rate   | 0.001     |
|    n_updates       | 4897      |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.24e+05 |
| time/              |           |
|    episodes        | 3         |
|    fps             | 107       |
|    time_elapsed    | 69        |
|    total_timesteps | 7497      |
| train/             |           |
|    actor_loss      | 1.75e+03  |
|    critic_loss     | 1.13e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 7396      |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.24e+05 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 107       |
|    time_elapsed    | 93        |
|    total_timesteps | 9996      |
| train/             |           |
|    actor_loss      | 2.13e+03  |
|    critic_loss     | 590       |
|    learning_rate   | 0.001     |
|    n_updates       | 9895      |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.24e+05 |
| time/              |           |
|    episodes        | 5         |
|    fps             | 106       |
|    time_elapsed    | 116       |
|    total_timesteps | 12495     |
| train/             |           |
|    actor_loss      | 2.45e+03  |
|    critic_loss     | 615       |
|    learning_rate   | 0.001     |
|    n_updates       | 12394     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 6         |
|    fps             | 107       |
|    time_elapsed    | 139       |
|    total_timesteps | 14994     |
| train/             |           |
|    actor_loss      | 2.7e+03   |
|    critic_loss     | 1.08e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 14893     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 7         |
|    fps             | 106       |
|    time_elapsed    | 164       |
|    total_timesteps | 17493     |
| train/             |           |
|    actor_loss      | 2.98e+03  |
|    critic_loss     | 1.12e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 17392     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 105       |
|    time_elapsed    | 188       |
|    total_timesteps | 19992     |
| train/             |           |
|    actor_loss      | 3.21e+03  |
|    critic_loss     | 995       |
|    learning_rate   | 0.001     |
|    n_updates       | 19891     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 9         |
|    fps             | 105       |
|    time_elapsed    | 212       |
|    total_timesteps | 22491     |
| train/             |           |
|    actor_loss      | 3.45e+03  |
|    critic_loss     | 1.75e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 22390     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 10        |
|    fps             | 105       |
|    time_elapsed    | 236       |
|    total_timesteps | 24990     |
| train/             |           |
|    actor_loss      | 3.63e+03  |
|    critic_loss     | 1.18e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 24889     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 11        |
|    fps             | 105       |
|    time_elapsed    | 261       |
|    total_timesteps | 27489     |
| train/             |           |
|    actor_loss      | 3.82e+03  |
|    critic_loss     | 2.6e+03   |
|    learning_rate   | 0.001     |
|    n_updates       | 27388     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 12        |
|    fps             | 105       |
|    time_elapsed    | 284       |
|    total_timesteps | 29988     |
| train/             |           |
|    actor_loss      | 3.95e+03  |
|    critic_loss     | 1.08e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 29887     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 13        |
|    fps             | 104       |
|    time_elapsed    | 310       |
|    total_timesteps | 32487     |
| train/             |           |
|    actor_loss      | 4.2e+03   |
|    critic_loss     | 1.89e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 32386     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 14        |
|    fps             | 104       |
|    time_elapsed    | 334       |
|    total_timesteps | 34986     |
| train/             |           |
|    actor_loss      | 4.3e+03   |
|    critic_loss     | 1.74e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 34885     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 15        |
|    fps             | 103       |
|    time_elapsed    | 363       |
|    total_timesteps | 37485     |
| train/             |           |
|    actor_loss      | 4.34e+03  |
|    critic_loss     | 1.84e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 37384     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 16        |
|    fps             | 102       |
|    time_elapsed    | 389       |
|    total_timesteps | 39984     |
| train/             |           |
|    actor_loss      | 4.47e+03  |
|    critic_loss     | 1.19e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 39883     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 17        |
|    fps             | 102       |
|    time_elapsed    | 413       |
|    total_timesteps | 42483     |
| train/             |           |
|    actor_loss      | 4.7e+03   |
|    critic_loss     | 1.08e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 42382     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 18        |
|    fps             | 102       |
|    time_elapsed    | 439       |
|    total_timesteps | 44982     |
| train/             |           |
|    actor_loss      | 4.75e+03  |
|    critic_loss     | 1e+03     |
|    learning_rate   | 0.001     |
|    n_updates       | 44881     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 19        |
|    fps             | 101       |
|    time_elapsed    | 467       |
|    total_timesteps | 47481     |
| train/             |           |
|    actor_loss      | 4.96e+03  |
|    critic_loss     | 908       |
|    learning_rate   | 0.001     |
|    n_updates       | 47380     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 20        |
|    fps             | 101       |
|    time_elapsed    | 492       |
|    total_timesteps | 49980     |
| train/             |           |
|    actor_loss      | 5.05e+03  |
|    critic_loss     | 734       |
|    learning_rate   | 0.001     |
|    n_updates       | 49879     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.23e+05 |
| time/              |           |
|    episodes        | 21        |
|    fps             | 101       |
|    time_elapsed    | 519       |
|    total_timesteps | 52479     |
| train/             |           |
|    actor_loss      | 5.06e+03  |
|    critic_loss     | 1.74e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 52378     |
----------------------------------


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


KeyboardInterrupt: 

In [3]:
class TrainDataManager(AbstractDataManager):
    def get_obs_space(self) -> gym.spaces.Box:
        return gym.spaces.Dict({
            'data': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.universe_size, 6), dtype=np.float32),
            'weights': gym.spaces.Box(low=0, high=1, shape=(self.universe_size+1,), dtype=np.float32)
        })

    def get_data(self) -> tuple[int, int]:
        df = pd.read_csv('crsp_snp100_2010_to_2024.csv', dtype='string')
        df = df[['date', 'TICKER', 'PRC', 'VOL', 'ASKHI', 'BIDLO', 'FACPR']]
        df.date = pd.to_datetime(df.date)
        df.FACPR = df.FACPR.fillna('0.0')
        df.astype({
            'PRC': float,
            'VOL': float,
            'ASKHI': float,
            'BIDLO': float,
            'FACPR': float
        })
        df = df.drop_duplicates(subset=['date', 'TICKER'])
        df.dropna(inplace=True)
        lst = ['AAPL', 'AMD', 'AMZN', 'IBM',]
        ticker_ok = df.TICKER.value_counts() == df.TICKER.value_counts().max()
        def is_max_val_count(ticker: str) -> bool:
          return ticker_ok[ticker] and (ticker in lst)
        ok = df.apply(lambda row: is_max_val_count(row['TICKER']), axis=1)
        df = df[ok]
        df = df[(df.date.dt.year >= 2010) & (df.date.dt.year <= 2019)]
        self.stock_df = df.pivot(index='date', columns='TICKER', values='PRC').astype(float)

        self.times = df.date.unique()[1:]
        self.tickers = df.TICKER.unique()
        self.num_time_periods = len(self.times)-15-1
        self.universe_size = len(self.tickers)

        print(f"{self.universe_size=}")
        return self.num_time_periods, self.universe_size
    
    def get_state(self, t: int, w: npt.NDArray[np.float64], port_val: np.float64) -> npt.NDArray[np.float64]:
        s = np.random.rand(self.universe_size, 6)
        s[:, 0] = np.linspace(0, 1, self.universe_size)
        # print(f"{s.shape=}, {s=}")
        return {'data': s, 'weights': w}

    def get_prices(self, t: int) -> npt.NDArray[np.float64]:
        # today is self.times[self.t+3]
        return np.append(self.stock_df.loc[self.times[t+3], :].to_numpy().flatten(), 1.0)

In [4]:
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

class Custom_EIEE_CNN_Extractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Dict, features_dim: int = 37):
        super(Custom_EIEE_CNN_Extractor, self).__init__(observation_space, features_dim)
        self.universe_size, data_len = observation_space['data'].shape
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 2, kernel_size=(1, 3)),
            nn.ReLU(),
            nn.Conv2d(2, 4, kernel_size=(1, 3))
        ).cuda()

    def forward(self, observations: dict[str, torch.Tensor]) -> torch.Tensor:
        # print(f"{observations['data'][:, None, :, :].shape=}")
        x = self.cnn(observations['data'][:, None, :, :])
        # print(f"post cnn {x.shape=}, {observations['weights'].shape=}")
        # x = torch.hstack([x.flatten(start_dim=1), observations['weights']])
        # return x.flatten(start_dim=1)
        # print(f"{x.flatten(start_dim=1).shape=}, {observations['weights'].flatten(start_dim=1).shape=}")
        return torch.cat((x.flatten(start_dim=1), observations['weights'].flatten(start_dim=1)), dim=1)

In [7]:
# Parallel environments
from stable_baselines3 import DDPG, SAC
from stable_baselines3.common.noise import NormalActionNoise

train_env = PortfolioEnvWithTCost(dm=TrainDataManager(), rm=ProfitReward(), cp=0.01, cs=0.01)

# Set seeds
random.seed(42)
np.random.seed(42)
train_env.action_space.seed(43)
torch.manual_seed(42)

model = DDPG('MultiInputPolicy', train_env, buffer_size=10**7, verbose=1, policy_kwargs={
  'features_extractor_class': Custom_EIEE_CNN_Extractor,
}, action_noise=NormalActionNoise(mean=0, sigma=0.04*np.ones(5)))
model.learn(total_timesteps=10**10, log_interval=1)

self.universe_size=4
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  self.new_port_val = self.port_val * self.mu * (self.y @ self.w)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -2.86e+04 |
| time/              |           |
|    episodes        | 1         |
|    fps             | 89        |
|    time_elapsed    | 27        |
|    total_timesteps | 2499      |
| train/             |           |
|    actor_loss      | 34.1      |
|    critic_loss     | 0.582     |
|    learning_rate   | 0.001     |
|    n_updates       | 2398      |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.47e+04 |
| time/              |           |
|    episodes        | 2         |
|    fps             | 87        |
|    time_elapsed    | 56        |
|    total_timesteps | 4998      |
| train/             |           |
|    actor_loss      | 28        |
|    critic_loss     | 0.0857    |
|    learning_rate   | 0.001     |
|    n_updates      

KeyboardInterrupt: 