In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import EvalCallback
import stable_baselines3
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy.typing as npt
import gymnasium as gym

In [42]:
class PortfolioEnvWithTCost(gym.Env):
    def __init__(
        self,
        dm,
        w_lb=0, w_ub=1,
        cp=0.0, cs=0.0,
        logging=True
    ):
        # register managers
        self.dm = dm
        
        # set constants
        self.cp, self.cs = cp, cs
        self.logging = logging
        self.w_lb, self.w_ub = w_lb, w_ub

        # get data, set problem size
        self.num_time_periods, self.universe_size = self.dm.get_data()

        # set spaces
        assert w_lb <= w_ub
        self.observation_space = self.dm.get_obs_space()
        self.action_space = gym.spaces.Box(
            low=-1,
            high=1,
            shape=(self.universe_size + 1,),
            dtype=np.float64
        )

    def step(self, action: npt.NDArray[np.float64]) -> tuple:
        self.reward = 0

        rescaled_action = (self.w_ub - self.w_lb) * (action + 1) / 2.0 + self.w_lb
        self.reward += -1 * (rescaled_action.sum() - 1)**2

        terminated = False
        # if rescaled_action.sum() == 0:
        #     rescaled_action[-1] = 1.0
        #     self.reward -= 10**12
        #     terminated = True
        
        action = rescaled_action.flatten() / rescaled_action.sum()

        t = np.ones_like(action)
        t = t / t.sum()
        self.reward += - 100*((action - t)**2).sum()

        self.t += 1
        self.state = self.dm.get_state(self.t, action)
        finished = (self.t == self.num_time_periods)
        return self.state.copy(), self.reward, finished, terminated, {}

    def reset(self, *args, **kwargs) -> tuple[np.ndarray, dict]:
        w = np.zeros(self.universe_size + 1, dtype=float)
        w[-1] = 1.0

        self.t = 0
        self.state = self.dm.get_state(self.t, w)
        return self.state.copy(), {}

In [43]:
class ProfitReward():
    def __init__(self):
        pass

    def initialize_reward(self):
        pass

    def compute_reward(self, old_port_val: float, new_port_val: float) -> float:
        return new_port_val - old_port_val

In [44]:
class TrainDataManager():
    def get_obs_space(self) -> gym.spaces.Box:
        return gym.spaces.Dict({
            'data': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.universe_size, 6), dtype=np.float32),
            'weights': gym.spaces.Box(low=0, high=1, shape=(self.universe_size+1,), dtype=np.float32)
        })

    def get_data(self) -> tuple[int, int]:
        print(f"universe_size: 4")
        self.universe_size = 4
        return 2500, 4
    
    def get_state(self, t: int, w: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        s = np.random.rand(self.universe_size, 6)
        s[:, 0] = np.linspace(0, 1, self.universe_size)
        # print(f"{s.shape=}, {s=}")
        return {'data': s, 'weights': w}

In [45]:
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

class Custom_EIEE_CNN_Extractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Dict, features_dim: int = 37):
        super(Custom_EIEE_CNN_Extractor, self).__init__(observation_space, features_dim)
        self.universe_size, data_len = observation_space['data'].shape
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 2, kernel_size=(1, 3)),
            nn.ReLU(),
            nn.Conv2d(2, 4, kernel_size=(1, 3))
        ).cuda()

    def forward(self, observations: dict[str, torch.Tensor]) -> torch.Tensor:
        # print(f"{observations['data'][:, None, :, :].shape=}")
        x = self.cnn(observations['data'][:, None, :, :])
        # print(f"post cnn {x.shape=}, {observations['weights'].shape=}")
        # x = torch.hstack([x.flatten(start_dim=1), observations['weights']])
        # return x.flatten(start_dim=1)
        # print(f"{x.flatten(start_dim=1).shape=}, {observations['weights'].flatten(start_dim=1).shape=}")
        return torch.cat((x.flatten(start_dim=1), observations['weights'].flatten(start_dim=1)), dim=1)

In [46]:
# Parallel environments
from stable_baselines3 import DDPG, SAC
from stable_baselines3.common.noise import NormalActionNoise

train_env = PortfolioEnvWithTCost(dm=TrainDataManager(), cp=0.01, cs=0.01)

# Set seeds
random.seed(42)
np.random.seed(42)
train_env.action_space.seed(43)
torch.manual_seed(42)

model = DDPG('MultiInputPolicy', train_env, buffer_size=10**7, verbose=1, policy_kwargs={
  'features_extractor_class': Custom_EIEE_CNN_Extractor,
  # 'net_arch': [50, 50],
}, action_noise=NormalActionNoise(mean=0, sigma=0.02*np.ones(5)))
model.learn(total_timesteps=10**10, log_interval=1)

universe_size: 4
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -2.57e+03 |
| time/              |           |
|    episodes        | 1         |
|    fps             | 124       |
|    time_elapsed    | 20        |
|    total_timesteps | 2500      |
| train/             |           |
|    actor_loss      | 5.73      |
|    critic_loss     | 0.000801  |
|    learning_rate   | 0.001     |
|    n_updates       | 2399      |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 2.5e+03   |
|    ep_rew_mean     | -1.34e+03 |
| time/              |           |
|    episodes        | 2         |
|    fps             | 121       |
|    time_elapsed    | 41        |
|    total_timesteps | 5000      |
| train/             |           |
|    actor_lo

KeyboardInterrupt: 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import EvalCallback
import stable_baselines3
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy.typing as npt
import gymnasium as gym

In [2]:
class PortfolioEnvWithTCost(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float64)
        self.observation_space = gym.spaces.Dict({
            'data': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4, 6), dtype=np.float32),
            'weights': gym.spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)
        })

    def step(self, action) -> tuple:
        action = (action + 1) / 2
        t = np.linspace(0, 1, 5)
        t = t / t.sum()
        reward = -100 * ((action - t)**2).sum()
        reward -= (action.sum() - 1)**2

        x = np.random.rand(4, 6)
        x[:, 0] = np.linspace(0, 1, 4)

        finished = reward < -10
        finished = False
        return {'data': x, 'weights': action}, reward, finished, False, {}

    def reset(self, *args, **kwargs):
        x = np.random.rand(4, 6)
        x[:, 0] = np.linspace(0, 1, 4)
        w0 = np.zeros(5)
        w0[-1] = 1.0
        return {
            'data': x,
            'weights': w0
        }, {}

In [3]:
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

class Custom_EIEE_CNN_Extractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Dict, features_dim: int = 37):
        super(Custom_EIEE_CNN_Extractor, self).__init__(observation_space, features_dim)
        self.universe_size, data_len = observation_space['data'].shape
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 2, kernel_size=(1, 3)),
            nn.ReLU(),
            nn.Conv2d(2, 4, kernel_size=(1, 3))
        ).cuda()

    def forward(self, observations: dict[str, torch.Tensor]) -> torch.Tensor:
        # print(f"{observations['data'][:, None, :, :].shape=}")
        x = self.cnn(observations['data'][:, None, :, :])
        # print(f"post cnn {x.shape=}, {observations['weights'].shape=}")
        # x = torch.hstack([x.flatten(start_dim=1), observations['weights']])
        # return x.flatten(start_dim=1)
        # print(f"{x.flatten(start_dim=1).shape=}, {observations['weights'].flatten(start_dim=1).shape=}")
        return torch.cat((x.flatten(start_dim=1), observations['weights'].flatten(start_dim=1)), dim=1)

In [4]:
# Parallel environments
from stable_baselines3 import DDPG, SAC
from stable_baselines3.common.noise import NormalActionNoise
from gymnasium.wrappers import TimeLimit

train_env = PortfolioEnvWithTCost()
train_env = TimeLimit(train_env, max_episode_steps=1000)

# Set seeds
random.seed(42)
np.random.seed(42)
train_env.action_space.seed(43)
torch.manual_seed(42)

model = DDPG('MultiInputPolicy', train_env, buffer_size=10**7, verbose=1, policy_kwargs={
  'features_extractor_class': Custom_EIEE_CNN_Extractor,
  # 'net_arch': [50, 50],
}, action_noise=NormalActionNoise(mean=0, sigma=0.02*np.ones(5)))
model.learn(total_timesteps=10**10, log_interval=1)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -2.2e+04 |
| time/              |          |
|    episodes        | 1        |
|    fps             | 125      |
|    time_elapsed    | 7        |
|    total_timesteps | 1000     |
| train/             |          |
|    actor_loss      | 27.8     |
|    critic_loss     | 4.42     |
|    learning_rate   | 0.001    |
|    n_updates       | 899      |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1e+03     |
|    ep_rew_mean     | -1.15e+04 |
| time/              |           |
|    episodes        | 2         |
|    fps             | 125       |
|    time_elapsed    | 15        |
|    total_timesteps | 2000      |
| train/             |           |
|    actor_loss      | 29.9      |
|    criti

KeyboardInterrupt: 