In [1]:
import riskfolio as rf

import matplotlib.pyplot as plt
import numpy as np
import pickle
import random

import pandas as pd

import gym
from gym import spaces

from stable_baselines3 import PPO, A2C, TD3, SAC, DDPG, DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize, VecFrameStack, SubprocVecEnv
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy

import torch

In [2]:
def softmax_normalization(actions):
    numerator = np.exp(actions)
    denominator = np.sum(np.exp(actions))
    softmax_output = numerator/denominator
    return softmax_output

## Definition of gym Environment

In [3]:
# with open('data/sp500_it_components_7y.pickle', 'rb') as f:
#     test_data = pickle.load(f)
# test_data[0]['corrs']

In [66]:
class PortfolioAllocationEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data_file,
                 objective = 'max_return',
                 risk_measure = 'MV',
                 observations_range = (0, 1259)):
        """
        Инициализация среды
        """
        super(PortfolioAllocationEnv, self).__init__()

        with open(data_file, 'rb') as f:
            self.data = pickle.load(f)
#             for d in self.data:
#                 d['corrs'] = d['corrs'].iloc[3:, :]

        self.stock_dim = self.data[0]['corrs'].shape[1]

        close_price_changes = {stock['ticker']: stock['data']['Close_change'] for stock in self.data}
        self.close_prices_pct = pd.DataFrame(close_price_changes)

        self.tickers = [stock['ticker'] for stock in self.data]
        self.n_assets = len(self.data)

        self.objective = objective
        self.risk_measure = risk_measure

        self.rewards_memory = None
        self.actions_memory = None

        self.portfolio_value = 0
        self.observations_range = observations_range
        self.current_step = observations_range[0]

        self.weights = np.ones(self.n_assets) / self.n_assets

        self.reward_range = (-10, 10)
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(self.n_assets,))
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.n_assets, self.stock_dim + 1))

    def step(self, action):
        """
        Выполнение одного шага
        """
        # assert self.action_space.contains(action), "Invalid action"

        # Получаем текущее состояние
        state = self._get_state()

        # Вычисляем награду
        reward = self._get_reward(action)

        # Обновляем текущий шаг
        self.current_step += 1

        # Проверяем, достигнут ли конец эпизода
        done = self.current_step >= self.observations_range[1]

        if done:
            print(self.render())

        # Возвращаем состояние, награду и флаг окончания эпизода
        return state, reward, done, {}

    def reset(self):
        """
        Сброс среды в начальное состояние
        """
        self.portfolio_value = 0
        self.rewards_memory = []
        self.current_step = self.observations_range[0]
        self.actions_memory = []
        self.weights = np.ones(self.n_assets) / self.n_assets
        return self._get_state()

    def render(self, mode='human'):
        """
        Вывод информации о среде
        """
        return {
            'min_reward': min(self.rewards_memory),
            'max_reward': max(self.rewards_memory),
            'mean_reward': np.array(self.rewards_memory).mean(),
            'portfolio_value': self.portfolio_value,
            'weights_q0.25': np.quantile(self.weights, 0.25),
            'weights_q0.5': np.quantile(self.weights, 0.5),
            'weights_q0.75': np.quantile(self.weights, 0.75),
            'weights_max': np.max(self.weights),
            'weights_std': self.weights.std()
        }

    def _get_state(self):
        """
        Получение текущего состояния
        """
        values = [stock['corrs'].iloc[self.current_step].values for stock in self.data]
        observation = np.vstack(values)
        observation = np.concatenate([observation, self.weights[:, None]], axis=1).astype(np.float32)
        observation = np.nan_to_num(observation)
        return observation

    def _get_reward(self, action):
        """
        Вычисление награды
        """

        self.weights += action
        self.weights = softmax_normalization(self.weights)

        self.actions_memory.append(action)

        prices_change_hist = self.close_prices_pct.iloc[self.observations_range[0]:self.current_step + 1]
        current_prices = self.close_prices_pct.iloc[self.current_step].values

        portfolio_value_change = current_prices @ self.weights
        self.portfolio_value += portfolio_value_change

        reward = 0

        if self.objective == 'sharpe':
            mu = rf.mean_vector(prices_change_hist, method='hist')
            cov = rf.covar_matrix(prices_change_hist, method='shrunk')
            reward = rf.Sharpe(self.weights, mu, cov, prices_change_hist, rm=self.risk_measure)
        elif self.objective == 'price_change':
            # reward = portfolio_value_change
            reward = portfolio_value_change
        elif self.objective == 'min_risk':
            mu = rf.mean_vector(prices_change_hist, method='hist')
            cov = rf.covar_matrix(prices_change_hist, method='shrunk')
            reward = -rf.Sharpe_Risk(self.weights, cov, mu, rm=self.risk_measure)

        if np.isinf(reward):
            reward = 0

        self.rewards_memory.append(reward)
        # print('act', self.current_step, np.isnan(action).sum(), np.array(self.rewards_memory).mean())

        return reward

In [79]:
# class PortfolioAllocationEnv(gym.Env):
#     metadata = {'render.modes': ['human']}

#     def __init__(self, data_file,
#                  sector = None,
#                  objective = 'max_return',
#                  risk_measure = 'MV',
#                  drop_close = True,
#                  observations_range = (0, 1259)):
#         """
#         Инициализация среды
#         """
#         super(PortfolioAllocationEnv, self).__init__()

#         with open(data_file, 'rb') as f:
#             self.data = pickle.load(f)
#             if sector is not None:
#                 self.data = list(filter(lambda stock: stock['sector'] == sector, self.data))
#             if drop_close:
#                 for stock in self.data:
#                     stock['data'] = stock['data'].drop(['Close'], axis=1)

#         self.stock_dim = self.data[0]['data'].shape[1]

#         close_price_changes = {stock['ticker']: stock['data']['Close_Change'] for stock in self.data}
#         self.close_prices_pct = pd.DataFrame(close_price_changes)

#         self.tickers = [stock['ticker'] for stock in self.data]
#         self.n_assets = len(self.data)

#         self.objective = objective
#         self.risk_measure = risk_measure

#         self.rewards_memory = None
#         self.actions_memory = None

#         self.portfolio_value = 0
#         self.observations_range = observations_range
#         self.current_step = observations_range[0]

#         self.weights = np.ones(self.n_assets) / self.n_assets

#         self.reward_range = (-10, 10)
#         self.action_space = gym.spaces.Box(low=-1, high=1, shape=(self.n_assets,))
#         self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.n_assets, self.stock_dim + 1))

#     def step(self, action):
#         """
#         Выполнение одного шага
#         """
#         # assert self.action_space.contains(action), "Invalid action"

#         # Получаем текущее состояние
#         state = self._get_state()

#         # Вычисляем награду
#         reward = self._get_reward(action)

#         # Обновляем текущий шаг
#         self.current_step += 1

#         # Проверяем, достигнут ли конец эпизода
#         done = self.current_step >= self.observations_range[1]

#         if done:
#             print(self.render())

#         # Возвращаем состояние, награду и флаг окончания эпизода
#         return state, reward, done, {}

#     def reset(self):
#         """
#         Сброс среды в начальное состояние
#         """
#         self.portfolio_value = 0
#         self.rewards_memory = []
#         self.current_step = self.observations_range[0]
#         self.actions_memory = []
#         self.weights = np.ones(self.n_assets) / self.n_assets
#         return self._get_state()

#     def render(self, mode='human'):
#         """
#         Вывод информации о среде
#         """
#         return {
#             'min_reward': min(self.rewards_memory),
#             'max_reward': max(self.rewards_memory),
#             'mean_reward': np.array(self.rewards_memory).mean(),
#             'portfolio_value': self.portfolio_value,
#             'weights_q0.25': np.quantile(self.weights, 0.25),
#             'weights_q0.5': np.quantile(self.weights, 0.5),
#             'weights_q0.75': np.quantile(self.weights, 0.75),
#             'weights_max': np.max(self.weights),
#             'weights_std': self.weights.std()
#         }

#     def _get_state(self):
#         """
#         Получение текущего состояния
#         """
#         values = [stock['data'].iloc[self.current_step].values for stock in self.data]
#         observation = np.vstack(values)
#         observation = np.concatenate([observation, self.weights[:, None]], axis=1).astype(np.float32)
#         observation = np.nan_to_num(observation)
#         return observation

#     def _get_reward(self, action):
#         """
#         Вычисление награды
#         """

#         self.weights += action
#         self.weights = softmax_normalization(self.weights)

#         self.actions_memory.append(action)

#         prices_change_hist = self.close_prices_pct.iloc[self.observations_range[0]:self.current_step + 1]
#         current_prices = self.close_prices_pct.iloc[self.current_step].values

#         portfolio_value_change = current_prices @ self.weights
#         self.portfolio_value += portfolio_value_change

#         reward = 0

#         if self.objective == 'sharpe':
#             mu = rf.mean_vector(prices_change_hist, method='hist')
#             cov = rf.covar_matrix(prices_change_hist, method='shrunk')
#             reward = rf.Sharpe(self.weights, mu, cov, prices_change_hist, rm=self.risk_measure)
#         elif self.objective == 'price_change':
#             # reward = portfolio_value_change
#             reward = portfolio_value_change
#         elif self.objective == 'min_risk':
#             mu = rf.mean_vector(prices_change_hist, method='hist')
#             cov = rf.covar_matrix(prices_change_hist, method='shrunk')
#             reward = -rf.Sharpe_Risk(self.weights, cov, mu, rm=self.risk_measure)

#         if np.isinf(reward):
#             reward = 0

#         self.rewards_memory.append(reward)
#         # print('act', self.current_step, np.isnan(action).sum(), np.array(self.rewards_memory).mean())

#         return reward

In [80]:
train_env = PortfolioAllocationEnv(data_file='data/sp500_re_components_7y.pickle',
#                                    sector='Information Technology',
                                   objective='price_change',
#                                    drop_close=False,
                                   observations_range=(0, 1260))  # 252 x 5 = 1260, 252 x 3 = 756
check_env(train_env, skip_render_check=False)



In [81]:
len(train_env.data[0]['corrs'])

1760

### Vectorization of Env

In [82]:
from stable_baselines3.common.vec_env import VecCheckNan

train_env = DummyVecEnv([lambda: train_env])
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=False)
train_env = VecCheckNan(train_env, raise_exception=True)
# train_env = VecFrameStack(train_env, 15, channels_order='first')

In [83]:
train_env.observation_space.shape, train_env.action_space.shape

((28, 41), (28,))

## stablebaselines Models

In [84]:
from stable_baselines3.ppo import MlpPolicy
from sb3_contrib.ppo_recurrent import MlpLstmPolicy
from sb3_contrib import RecurrentPPO

In [53]:
DDPG?

In [85]:
model = SAC('MlpPolicy', train_env, learning_rate=1e-3, buffer_size=20_000,
            verbose=1, tensorboard_log='logs/',
            device='cuda', batch_size=128,
#             policy_kwargs={'net_arch': [800, 400]}
            )
model.policy

Using cuda device


SACPolicy(
  (actor): Actor(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (latent_pi): Sequential(
      (0): Linear(in_features=1148, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): ReLU()
    )
    (mu): Linear(in_features=256, out_features=28, bias=True)
    (log_std): Linear(in_features=256, out_features=28, bias=True)
  )
  (critic): ContinuousCritic(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (qf0): Sequential(
      (0): Linear(in_features=1176, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): ReLU()
      (4): Linear(in_features=256, out_features=1, bias=True)
    )
    (qf1): Sequential(
      (0): Linear(in_features=1176, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_feature

In [86]:
mean_reward, std_reward = evaluate_policy(model, train_env, n_eval_episodes=1, deterministic=False, warn=False, return_episode_rewards=True)
print(mean_reward, std_reward)

{'min_reward': -0.17888513111263077, 'max_reward': 0.1016405990048536, 'mean_reward': 0.00027419362430891305, 'portfolio_value': 0.3454839666292308, 'weights_q0.25': 0.019981438691910217, 'weights_q0.5': 0.025651049657377967, 'weights_q0.75': 0.0489902893783318, 'weights_max': 0.09459255277172963, 'weights_std': 0.022267354644032733}
[0.3454839686530704] [1260]


### Learn loop

In [87]:
# eval_callback = EvalCallback(eval_env=test_env, best_model_save_path='models/best.stbl',
#                              n_eval_episodes=3, eval_freq=500, render=False, warn=False)
# eval_callback

In [88]:
class MeanRewardCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(MeanRewardCallback, self).__init__(verbose)
        self.render = {}

    def _on_step(self) -> bool:

        if self.training_env.get_attr('rewards_memory')[0]:  # else - end of episode
            # reward = self.training_env.get_attr('rewards_memory')[0][-1]
            self.render = self.training_env.render()
        else:
            self.logger.record("train/mean_reward", self.render['mean_reward'])
            self.logger.record("train/min_reward", self.render['min_reward'])
            self.logger.record("train/max_reward", self.render['max_reward'])
            self.logger.record("train/portfolio_value", self.render['portfolio_value'])

        return True

In [89]:
model_name = 'SAC-mr-re-256-256-corrs-dw'
model.learn(total_timesteps=150_000, progress_bar=False, log_interval=1,
            reset_num_timesteps=False, tb_log_name=model_name,
            callback=MeanRewardCallback())

Logging to logs/SAC-mr-re-256-256-corrs-dw_0
{'min_reward': -0.1788921409979694, 'max_reward': 0.08892297520103949, 'mean_reward': 0.0002711987671513675, 'portfolio_value': 0.3417104466107232, 'weights_q0.25': 0.020148079638662754, 'weights_q0.5': 0.031008298210897442, 'weights_q0.75': 0.05254317331350907, 'weights_max': 0.08901289524707764, 'weights_std': 0.01875007540657821}
---------------------------------
| time/              |          |
|    episodes        | 1        |
|    fps             | 148      |
|    time_elapsed    | 8        |
|    total_timesteps | 1260     |
| train/             |          |
|    actor_loss      | -47.3    |
|    critic_loss     | 0.46     |
|    ent_coef        | 0.315    |
|    ent_coef_loss   | -53.6    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.0889   |
|    mean_reward     | 0.000266 |
|    min_reward      | -0.179   |
|    n_updates       | 1159     |
|    portfolio_value | 0.335    |
---------------------------------
{'min_re

{'min_reward': -0.17885128215343749, 'max_reward': 0.1259980923644234, 'mean_reward': 0.00029960163746818743, 'portfolio_value': 0.3774980632099162, 'weights_q0.25': 0.01502203438845991, 'weights_q0.5': 0.03062525712245362, 'weights_q0.75': 0.05189125820344894, 'weights_max': 0.07333904550270333, 'weights_std': 0.022123288943855696}
---------------------------------
| time/              |          |
|    episodes        | 10       |
|    fps             | 133      |
|    time_elapsed    | 94       |
|    total_timesteps | 12600    |
| train/             |          |
|    actor_loss      | -33.3    |
|    critic_loss     | 0.051    |
|    ent_coef        | 0.000514 |
|    ent_coef_loss   | 5.26     |
|    learning_rate   | 0.001    |
|    max_reward      | 0.126    |
|    mean_reward     | 0.00029  |
|    min_reward      | -0.179   |
|    n_updates       | 12499    |
|    portfolio_value | 0.365    |
---------------------------------
{'min_reward': -0.16383123820120393, 'max_reward': 0.

{'min_reward': -0.17069456722223045, 'max_reward': 0.08590975509671253, 'mean_reward': 0.0003268243721808611, 'portfolio_value': 0.4117987089478838, 'weights_q0.25': 0.0252067170377713, 'weights_q0.5': 0.03415465150326273, 'weights_q0.75': 0.0410142446586077, 'weights_max': 0.06916042662412752, 'weights_std': 0.015457435630642398}
---------------------------------
| time/              |          |
|    episodes        | 19       |
|    fps             | 132      |
|    time_elapsed    | 180      |
|    total_timesteps | 23940    |
| train/             |          |
|    actor_loss      | -17.3    |
|    critic_loss     | 0.011    |
|    ent_coef        | 0.000226 |
|    ent_coef_loss   | -12.8    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.0859   |
|    mean_reward     | 0.000321 |
|    min_reward      | -0.171   |
|    n_updates       | 23839    |
|    portfolio_value | 0.404    |
---------------------------------
{'min_reward': -0.18017314337973067, 'max_reward': 0.10

{'min_reward': -0.17865659519297905, 'max_reward': 0.14562616676973522, 'mean_reward': 0.0003682611643490276, 'portfolio_value': 0.46400906707977524, 'weights_q0.25': 0.02045672453129592, 'weights_q0.5': 0.035595033270397064, 'weights_q0.75': 0.04859479027766392, 'weights_max': 0.06774716648036291, 'weights_std': 0.0174308447847073}
---------------------------------
| time/              |          |
|    episodes        | 28       |
|    fps             | 132      |
|    time_elapsed    | 265      |
|    total_timesteps | 35280    |
| train/             |          |
|    actor_loss      | -9.6     |
|    critic_loss     | 0.000623 |
|    ent_coef        | 7.59e-05 |
|    ent_coef_loss   | -63.4    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.146    |
|    mean_reward     | 0.000359 |
|    min_reward      | -0.179   |
|    n_updates       | 35179    |
|    portfolio_value | 0.452    |
---------------------------------
{'min_reward': -0.1797662494113489, 'max_reward': 0.1

{'min_reward': -0.1847251193201035, 'max_reward': 0.1261287602780347, 'mean_reward': 0.0004240401513831691, 'portfolio_value': 0.5342905907427936, 'weights_q0.25': 0.0239575994655211, 'weights_q0.5': 0.03829726706397728, 'weights_q0.75': 0.04739665038164188, 'weights_max': 0.061107547998346584, 'weights_std': 0.015168599417948167}
---------------------------------
| time/              |          |
|    episodes        | 37       |
|    fps             | 132      |
|    time_elapsed    | 351      |
|    total_timesteps | 46620    |
| train/             |          |
|    actor_loss      | -5.2     |
|    critic_loss     | 0.000118 |
|    ent_coef        | 4.19e-05 |
|    ent_coef_loss   | -43.1    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.126    |
|    mean_reward     | 0.000419 |
|    min_reward      | -0.185   |
|    n_updates       | 46519    |
|    portfolio_value | 0.528    |
---------------------------------
{'min_reward': -0.17998635435069274, 'max_reward': 0.14

{'min_reward': -0.1736821187467131, 'max_reward': 0.1811198535250156, 'mean_reward': 0.000638724708357461, 'portfolio_value': 0.8047931325304012, 'weights_q0.25': 0.023594662124204108, 'weights_q0.5': 0.034944964001319925, 'weights_q0.75': 0.049388422691268356, 'weights_max': 0.06409936268122089, 'weights_std': 0.01626506933720104}
---------------------------------
| time/              |          |
|    episodes        | 46       |
|    fps             | 132      |
|    time_elapsed    | 436      |
|    total_timesteps | 57960    |
| train/             |          |
|    actor_loss      | -2.51    |
|    critic_loss     | 0.000212 |
|    ent_coef        | 2.28e-05 |
|    ent_coef_loss   | 42.7     |
|    learning_rate   | 0.001    |
|    max_reward      | 0.181    |
|    mean_reward     | 0.000634 |
|    min_reward      | -0.174   |
|    n_updates       | 57859    |
|    portfolio_value | 0.798    |
---------------------------------
{'min_reward': -0.17473884635974507, 'max_reward': 0.1

{'min_reward': -0.16201415375429612, 'max_reward': 0.17985844642483345, 'mean_reward': 0.0009347836474341463, 'portfolio_value': 1.177827395767024, 'weights_q0.25': 0.020355520718922278, 'weights_q0.5': 0.03408151922220193, 'weights_q0.75': 0.04620924343830764, 'weights_max': 0.06663302462016972, 'weights_std': 0.017690262268660144}
---------------------------------
| time/              |          |
|    episodes        | 55       |
|    fps             | 133      |
|    time_elapsed    | 520      |
|    total_timesteps | 69300    |
| train/             |          |
|    actor_loss      | -1.47    |
|    critic_loss     | 3.33e-05 |
|    ent_coef        | 1.48e-05 |
|    ent_coef_loss   | -12.1    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.18     |
|    mean_reward     | 0.000929 |
|    min_reward      | -0.162   |
|    n_updates       | 69199    |
|    portfolio_value | 1.17     |
---------------------------------
{'min_reward': -0.16007123496801204, 'max_reward': 0.

{'min_reward': -0.16129926027692598, 'max_reward': 0.19492403759206606, 'mean_reward': 0.0011769698812176164, 'portfolio_value': 1.4829820503341984, 'weights_q0.25': 0.016827653631637463, 'weights_q0.5': 0.03194609494292505, 'weights_q0.75': 0.05794576045036908, 'weights_max': 0.06333424966795773, 'weights_std': 0.02092031922856697}
---------------------------------
| time/              |          |
|    episodes        | 64       |
|    fps             | 134      |
|    time_elapsed    | 600      |
|    total_timesteps | 80640    |
| train/             |          |
|    actor_loss      | -0.755   |
|    critic_loss     | 8.71e-05 |
|    ent_coef        | 7.05e-06 |
|    ent_coef_loss   | 94.7     |
|    learning_rate   | 0.001    |
|    max_reward      | 0.195    |
|    mean_reward     | 0.00117  |
|    min_reward      | -0.161   |
|    n_updates       | 80539    |
|    portfolio_value | 1.48     |
---------------------------------
{'min_reward': -0.16740609591364725, 'max_reward': 0.

{'min_reward': -0.15903538141239526, 'max_reward': 0.19465908264178505, 'mean_reward': 0.0017017501730014304, 'portfolio_value': 2.1442052179817996, 'weights_q0.25': 0.012348574260061033, 'weights_q0.5': 0.03082766224543623, 'weights_q0.75': 0.06455330883617436, 'weights_max': 0.07084642086029257, 'weights_std': 0.02338975256960003}
---------------------------------
| time/              |          |
|    episodes        | 73       |
|    fps             | 134      |
|    time_elapsed    | 682      |
|    total_timesteps | 91980    |
| train/             |          |
|    actor_loss      | -0.481   |
|    critic_loss     | 4.53e-06 |
|    ent_coef        | 7.07e-06 |
|    ent_coef_loss   | 25.2     |
|    learning_rate   | 0.001    |
|    max_reward      | 0.195    |
|    mean_reward     | 0.0017   |
|    min_reward      | -0.159   |
|    n_updates       | 91879    |
|    portfolio_value | 2.14     |
---------------------------------
{'min_reward': -0.15864203055920484, 'max_reward': 0.

{'min_reward': -0.16329311547622416, 'max_reward': 0.20892302497041898, 'mean_reward': 0.0020550741160210444, 'portfolio_value': 2.5893933861865137, 'weights_q0.25': 0.00940066271644853, 'weights_q0.5': 0.0375506205641063, 'weights_q0.75': 0.060620683507172056, 'weights_max': 0.06456678381647428, 'weights_std': 0.02386728075981224}
---------------------------------
| time/              |          |
|    episodes        | 82       |
|    fps             | 134      |
|    time_elapsed    | 765      |
|    total_timesteps | 103320   |
| train/             |          |
|    actor_loss      | -0.305   |
|    critic_loss     | 8.48e-06 |
|    ent_coef        | 8.04e-06 |
|    ent_coef_loss   | -8.34    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.209    |
|    mean_reward     | 0.00205  |
|    min_reward      | -0.163   |
|    n_updates       | 103219   |
|    portfolio_value | 2.59     |
---------------------------------
{'min_reward': -0.1526045363690866, 'max_reward': 0.20

{'min_reward': -0.148852155155832, 'max_reward': 0.20740296681401243, 'mean_reward': 0.002624007690291726, 'portfolio_value': 3.306249689767573, 'weights_q0.25': 0.010215166089351965, 'weights_q0.5': 0.02402263192654776, 'weights_q0.75': 0.06394093514334284, 'weights_max': 0.06998422799577392, 'weights_std': 0.026101145032296155}
---------------------------------
| time/              |          |
|    episodes        | 91       |
|    fps             | 134      |
|    time_elapsed    | 850      |
|    total_timesteps | 114660   |
| train/             |          |
|    actor_loss      | -0.246   |
|    critic_loss     | 4.81e-05 |
|    ent_coef        | 9.08e-06 |
|    ent_coef_loss   | -13.8    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.207    |
|    mean_reward     | 0.00263  |
|    min_reward      | -0.149   |
|    n_updates       | 114559   |
|    portfolio_value | 3.31     |
---------------------------------
{'min_reward': -0.15640851593448632, 'max_reward': 0.206

{'min_reward': -0.15201122424097915, 'max_reward': 0.20641783033626843, 'mean_reward': 0.0027854052228319685, 'portfolio_value': 3.509610580768279, 'weights_q0.25': 0.009930766441986975, 'weights_q0.5': 0.025923769449914737, 'weights_q0.75': 0.06548775319090627, 'weights_max': 0.07031087003870366, 'weights_std': 0.026267862974110205}
---------------------------------
| time/              |          |
|    episodes        | 100      |
|    fps             | 134      |
|    time_elapsed    | 934      |
|    total_timesteps | 126000   |
| train/             |          |
|    actor_loss      | -0.227   |
|    critic_loss     | 5.56e-06 |
|    ent_coef        | 1.12e-05 |
|    ent_coef_loss   | 21.6     |
|    learning_rate   | 0.001    |
|    max_reward      | 0.206    |
|    mean_reward     | 0.00279  |
|    min_reward      | -0.152   |
|    n_updates       | 125899   |
|    portfolio_value | 3.51     |
---------------------------------
{'min_reward': -0.1520013290900144, 'max_reward': 0.

{'min_reward': -0.15723847207131708, 'max_reward': 0.20682796125848804, 'mean_reward': 0.0030969816821295195, 'portfolio_value': 3.9021969194831927, 'weights_q0.25': 0.013357503530440865, 'weights_q0.5': 0.025289437560607888, 'weights_q0.75': 0.0592321564441895, 'weights_max': 0.07778071011439174, 'weights_std': 0.024697629020929583}
---------------------------------
| time/              |          |
|    episodes        | 109      |
|    fps             | 134      |
|    time_elapsed    | 1018     |
|    total_timesteps | 137340   |
| train/             |          |
|    actor_loss      | -0.247   |
|    critic_loss     | 5.31e-06 |
|    ent_coef        | 1.11e-05 |
|    ent_coef_loss   | -1.17    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.207    |
|    mean_reward     | 0.0031   |
|    min_reward      | -0.157   |
|    n_updates       | 137239   |
|    portfolio_value | 3.9      |
---------------------------------
{'min_reward': -0.1568534224981808, 'max_reward': 0.

{'min_reward': -0.15143566937644126, 'max_reward': 0.20675489053806198, 'mean_reward': 0.00328807773730698, 'portfolio_value': 4.142977949006797, 'weights_q0.25': 0.010930639477993182, 'weights_q0.5': 0.025832406966625525, 'weights_q0.75': 0.0634993178762739, 'weights_max': 0.07523004119948225, 'weights_std': 0.024923456300297974}
---------------------------------
| time/              |          |
|    episodes        | 118      |
|    fps             | 135      |
|    time_elapsed    | 1099     |
|    total_timesteps | 148680   |
| train/             |          |
|    actor_loss      | -0.272   |
|    critic_loss     | 5.43e-06 |
|    ent_coef        | 1.1e-05  |
|    ent_coef_loss   | -15.1    |
|    learning_rate   | 0.001    |
|    max_reward      | 0.207    |
|    mean_reward     | 0.00329  |
|    min_reward      | -0.151   |
|    n_updates       | 148579   |
|    portfolio_value | 4.15     |
---------------------------------
{'min_reward': -0.15388075259987127, 'max_reward': 0.20

<stable_baselines3.sac.sac.SAC at 0x7f149123cd00>

#### Saving model and running mean from env

In [90]:
model.save('models/' + model_name)
with open('models/' + model_name + '_obs_rms.pickle', 'wb') as handle:
    pickle.dump(train_env.obs_rms, handle)

## Evaluation

In [91]:
mean_reward, std_reward = evaluate_policy(model, train_env, n_eval_episodes=5, deterministic=False, warn=False, return_episode_rewards=False)
print(mean_reward, std_reward)

{'min_reward': -0.1551756418967425, 'max_reward': 0.2051501027367493, 'mean_reward': 0.0031182704246739334, 'portfolio_value': 3.9290207350891557, 'weights_q0.25': 0.012947083196902343, 'weights_q0.5': 0.02831708697868826, 'weights_q0.75': 0.06261019890797306, 'weights_max': 0.07241900651081343, 'weights_std': 0.023459503237995828}
{'min_reward': -0.15261769454163304, 'max_reward': 0.2067505208626652, 'mean_reward': 0.0031151472645706093, 'portfolio_value': 3.9250855533589637, 'weights_q0.25': 0.011957855735723659, 'weights_q0.5': 0.023034958578173094, 'weights_q0.75': 0.06179814493697238, 'weights_max': 0.06960267782515266, 'weights_std': 0.024291828874461626}
{'min_reward': -0.1558077356194606, 'max_reward': 0.2057931701088344, 'mean_reward': 0.003103618810809518, 'portfolio_value': 3.9105597016199978, 'weights_q0.25': 0.013518319494265381, 'weights_q0.5': 0.02403929192007034, 'weights_q0.75': 0.0625736048484683, 'weights_max': 0.07221934615278286, 'weights_std': 0.023597901522330263

In [92]:
train_env.venv.venv.envs[0].__dict__.keys()

dict_keys(['data', 'stock_dim', 'close_prices_pct', 'tickers', 'n_assets', 'objective', 'risk_measure', 'rewards_memory', 'actions_memory', 'portfolio_value', 'observations_range', 'current_step', 'weights', 'reward_range', 'action_space', 'observation_space'])

In [93]:
train_env.training = False
train_env.venv.venv.envs[0].observations_range = (0, 1760) # change to use the test period

In [94]:
import copy

obs = train_env.reset()
weights_states = [train_env.venv.venv.envs[0].weights]
portfolio_values = [0]
while True:
    action, _states = model.predict(obs)
    obs, rewards, done, info = train_env.step(action)
    if done:
        break
    else:
        weights_states.append(copy.deepcopy(train_env.venv.venv.envs[0].weights))
        portfolio_values.append(train_env.venv.venv.envs[0].portfolio_value)
        print(train_env.render())
len(portfolio_values), len(weights_states)

{'min_reward': -0.004746610762858191, 'max_reward': -0.004746610762858191, 'mean_reward': -0.004746610762858191, 'portfolio_value': -0.004746610762858191, 'weights_q0.25': 0.012425719301477869, 'weights_q0.5': 0.028633291510034624, 'weights_q0.75': 0.06033297567917744, 'weights_max': 0.0743449346773447, 'weights_std': 0.02318512511726389}
{'min_reward': -0.021275502604930344, 'max_reward': -0.004746610762858191, 'mean_reward': -0.013011056683894268, 'portfolio_value': -0.026022113367788535, 'weights_q0.25': 0.013377119957668228, 'weights_q0.5': 0.031773963374719216, 'weights_q0.75': 0.05709768210254099, 'weights_max': 0.06952922760609104, 'weights_std': 0.022144421160463296}
{'min_reward': -0.021275502604930344, 'max_reward': -0.004746610762858191, 'mean_reward': -0.011356544208212463, 'portfolio_value': -0.034069632624637386, 'weights_q0.25': 0.012924303139651544, 'weights_q0.5': 0.029190664871458286, 'weights_q0.75': 0.06026878612609565, 'weights_max': 0.07495704238045953, 'weights_s

{'min_reward': -0.02742439463692864, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002840218400381273, 'portfolio_value': 0.19881528802668913, 'weights_q0.25': 0.009735467922494575, 'weights_q0.5': 0.0339060874691871, 'weights_q0.75': 0.061292301205141134, 'weights_max': 0.06608316991249154, 'weights_std': 0.02445705229029152}
{'min_reward': -0.02742439463692864, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002898392869942885, 'portfolio_value': 0.20578589376594483, 'weights_q0.25': 0.013803453327223665, 'weights_q0.5': 0.01516560655750862, 'weights_q0.75': 0.05842922039813853, 'weights_max': 0.09337310120878424, 'weights_std': 0.02832481801502476}
{'min_reward': -0.02742439463692864, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0028748182885622205, 'portfolio_value': 0.20698691677647987, 'weights_q0.25': 0.010330511921364458, 'weights_q0.5': 0.02502540362918424, 'weights_q0.75': 0.06645911211235275, 'weights_max': 0.06917215646639038, 'weights_std': 0.0253495441760

{'min_reward': -0.02742439463692864, 'max_reward': 0.03021282574113266, 'mean_reward': 0.003391934218139799, 'portfolio_value': 0.4341675799218941, 'weights_q0.25': 0.015609044222964418, 'weights_q0.5': 0.016667422883993543, 'weights_q0.75': 0.02486776577886215, 'weights_max': 0.12194339313739927, 'weights_std': 0.036939187710260175}
{'min_reward': -0.02742439463692864, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0034345555242686083, 'portfolio_value': 0.4430576626306503, 'weights_q0.25': 0.014841441995471813, 'weights_q0.5': 0.017684388525860732, 'weights_q0.75': 0.039980016793115795, 'weights_max': 0.11053507943879695, 'weights_std': 0.03229491671610776}
{'min_reward': -0.02742439463692864, 'max_reward': 0.03021282574113266, 'mean_reward': 0.003471291908605007, 'portfolio_value': 0.4512679481186507, 'weights_q0.25': 0.015199403846139409, 'weights_q0.5': 0.016137013948930995, 'weights_q0.75': 0.03217080446132973, 'weights_max': 0.11884460883855893, 'weights_std': 0.03795392024

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0024547132811090803, 'portfolio_value': 0.486033229659598, 'weights_q0.25': 0.013634260142504597, 'weights_q0.5': 0.021619545412943805, 'weights_q0.75': 0.05338338072262827, 'weights_max': 0.0867497003342449, 'weights_std': 0.026236052685693134}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0024710344155239655, 'portfolio_value': 0.4917358486892693, 'weights_q0.25': 0.0138261119851038, 'weights_q0.5': 0.022761098607861695, 'weights_q0.75': 0.04950371996540579, 'weights_max': 0.09242541508657408, 'weights_std': 0.027516228773693096}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002491734908549388, 'portfolio_value': 0.49834698170987773, 'weights_q0.25': 0.013942028599395544, 'weights_q0.5': 0.019022172910399548, 'weights_q0.75': 0.05291444440805548, 'weights_max': 0.09362578012732083, 'weights_std': 0.0287930311838

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0024950513413680496, 'portfolio_value': 0.6387331433902206, 'weights_q0.25': 0.012111720273221991, 'weights_q0.5': 0.013965906800003168, 'weights_q0.75': 0.06726447363376134, 'weights_max': 0.09124373902796619, 'weights_std': 0.030910107292910417}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0024925047078697463, 'portfolio_value': 0.6405737099225247, 'weights_q0.25': 0.012759861743216562, 'weights_q0.5': 0.01607133521256751, 'weights_q0.75': 0.07181560905750992, 'weights_max': 0.09081903801737456, 'weights_std': 0.030528636405699066}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002507970992158203, 'portfolio_value': 0.6470565159768162, 'weights_q0.25': 0.012119863927937005, 'weights_q0.5': 0.019673877552485106, 'weights_q0.75': 0.06171339819513307, 'weights_max': 0.08186515542491504, 'weights_std': 0.02684061052

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0025181833548706054, 'portfolio_value': 0.8209277736878176, 'weights_q0.25': 0.011216800726713314, 'weights_q0.5': 0.02110485962294477, 'weights_q0.75': 0.06496878362293784, 'weights_max': 0.07909959330081918, 'weights_std': 0.027450919876131255}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002495757112438362, 'portfolio_value': 0.8161125757673446, 'weights_q0.25': 0.012560641751745144, 'weights_q0.5': 0.017914279872284115, 'weights_q0.75': 0.062412485421090715, 'weights_max': 0.0855374899255065, 'weights_std': 0.02833768477109727}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002507757021156249, 'portfolio_value': 0.8225443029392498, 'weights_q0.25': 0.01500794052798473, 'weights_q0.5': 0.021905264855100845, 'weights_q0.75': 0.059936109223973515, 'weights_max': 0.08524268771984105, 'weights_std': 0.0256675607644

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0024685608806976223, 'portfolio_value': 0.9479273781878872, 'weights_q0.25': 0.010137848352740642, 'weights_q0.5': 0.022571307874473884, 'weights_q0.75': 0.059937456622914276, 'weights_max': 0.07631485829308972, 'weights_std': 0.027246742500310114}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002478383873517109, 'portfolio_value': 0.9541777913040871, 'weights_q0.25': 0.009611418749910415, 'weights_q0.5': 0.023148141474930155, 'weights_q0.75': 0.06558280450765405, 'weights_max': 0.07002199423818443, 'weights_std': 0.02619034252776393}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0024760704893015567, 'portfolio_value': 0.955763208870401, 'weights_q0.25': 0.01646997773514423, 'weights_q0.5': 0.026635228310084212, 'weights_q0.75': 0.05662867303524301, 'weights_max': 0.07470347612202755, 'weights_std': 0.023140919411

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002484094082990814, 'portfolio_value': 1.1277787136778297, 'weights_q0.25': 0.008317570111712178, 'weights_q0.5': 0.05397836169486955, 'weights_q0.75': 0.05554230878053267, 'weights_max': 0.05906576476613072, 'weights_std': 0.022469919239277734}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002468012722022796, 'portfolio_value': 1.1229457885203722, 'weights_q0.25': 0.011923766923451045, 'weights_q0.5': 0.01584536589847846, 'weights_q0.75': 0.07075612929374502, 'weights_max': 0.07928295865828495, 'weights_std': 0.027667234080892986}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002457844441881177, 'portfolio_value': 1.1207770654978166, 'weights_q0.25': 0.018899626304921098, 'weights_q0.5': 0.03920110488156018, 'weights_q0.75': 0.05109447636446006, 'weights_max': 0.058372159198506635, 'weights_std': 0.01726924228221

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002365862439508078, 'portfolio_value': 1.211321569028136, 'weights_q0.25': 0.010624386519777172, 'weights_q0.5': 0.023097388176394715, 'weights_q0.75': 0.0684121868756217, 'weights_max': 0.07884507772827022, 'weights_std': 0.027867516430067275}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002346998004350454, 'portfolio_value': 1.2040099762317826, 'weights_q0.25': 0.012352023154666346, 'weights_q0.5': 0.0152388843650869, 'weights_q0.75': 0.06422067112459598, 'weights_max': 0.09162725331558755, 'weights_std': 0.030531927072586996}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002359174071541083, 'portfolio_value': 1.2126154727721163, 'weights_q0.25': 0.013032493409092332, 'weights_q0.5': 0.02609799342221214, 'weights_q0.75': 0.04698800738961313, 'weights_max': 0.09032199213622781, 'weights_std': 0.02764504238311777

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0023059726930694086, 'portfolio_value': 1.3420761073663965, 'weights_q0.25': 0.014071727622787784, 'weights_q0.5': 0.015350666599534674, 'weights_q0.75': 0.05982829888326803, 'weights_max': 0.10709021753098975, 'weights_std': 0.032237761435098065}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0022950238958624226, 'portfolio_value': 1.3379989312877931, 'weights_q0.25': 0.019398799732201353, 'weights_q0.5': 0.021962202768431714, 'weights_q0.75': 0.02875055743819503, 'weights_max': 0.12414046328421413, 'weights_std': 0.03036591174849346}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002306324467172547, 'portfolio_value': 1.3468934888287682, 'weights_q0.25': 0.02480946365988889, 'weights_q0.5': 0.025931713502626512, 'weights_q0.75': 0.029897440082732304, 'weights_max': 0.14722649924144893, 'weights_std': 0.02792091770

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.00236280403537619, 'portfolio_value': 1.5121945826407621, 'weights_q0.25': 0.012017617832831055, 'weights_q0.5': 0.017959572769089674, 'weights_q0.75': 0.06779089663583554, 'weights_max': 0.08367002149074976, 'weights_std': 0.027662268915284716}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002359359109867571, 'portfolio_value': 1.5123491894251135, 'weights_q0.25': 0.01439199469423733, 'weights_q0.5': 0.015978142044141053, 'weights_q0.75': 0.035199196200736846, 'weights_max': 0.10455853104284724, 'weights_std': 0.034267151796692034}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0023511994515381905, 'portfolio_value': 1.5094700478875187, 'weights_q0.25': 0.01165704464133974, 'weights_q0.5': 0.02479704038464487, 'weights_q0.75': 0.05898279686248428, 'weights_max': 0.08117148487682567, 'weights_std': 0.02541572680144

{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.0022722210494260124, 'portfolio_value': 1.613276945092469, 'weights_q0.25': 0.013531677228379186, 'weights_q0.5': 0.017497990403807603, 'weights_q0.75': 0.05928940873652777, 'weights_max': 0.09603457812666139, 'weights_std': 0.028645210568815076}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002305266288231994, 'portfolio_value': 1.639044330932948, 'weights_q0.25': 0.015645590029665535, 'weights_q0.5': 0.016388092493157282, 'weights_q0.75': 0.019766716020998977, 'weights_max': 0.1170867692756636, 'weights_std': 0.03704120977624102}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03021282574113266, 'mean_reward': 0.002296515256420892, 'portfolio_value': 1.6351188625716757, 'weights_q0.25': 0.01652494420334863, 'weights_q0.5': 0.017754230834860205, 'weights_q0.75': 0.020928937367780676, 'weights_max': 0.13218783617032814, 'weights_std': 0.03684405129349

{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0022883663434006517, 'portfolio_value': 1.7551769853883006, 'weights_q0.25': 0.009757988373864691, 'weights_q0.5': 0.021262483321638485, 'weights_q0.75': 0.06175765312736644, 'weights_max': 0.06607363446410723, 'weights_std': 0.025598013364199812}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.002290139868109386, 'portfolio_value': 1.758827418708009, 'weights_q0.25': 0.008974878057554397, 'weights_q0.5': 0.038014251168625245, 'weights_q0.75': 0.058991560286113874, 'weights_max': 0.061168232349488004, 'weights_std': 0.022486191293966094}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023031418728674024, 'portfolio_value': 1.7711161002350329, 'weights_q0.25': 0.010978030690397404, 'weights_q0.5': 0.0362679544327459, 'weights_q0.75': 0.061161111042190026, 'weights_max': 0.06480988412350928, 'weights_std': 0.0230184389

{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023606720991249277, 'portfolio_value': 1.975882546967565, 'weights_q0.25': 0.015136946896888607, 'weights_q0.5': 0.01701330569377104, 'weights_q0.75': 0.048933207476552235, 'weights_max': 0.11296451407366116, 'weights_std': 0.03276669054214021}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.002353378575083966, 'portfolio_value': 1.9721312459203637, 'weights_q0.25': 0.013200073989800148, 'weights_q0.5': 0.020175094540809915, 'weights_q0.75': 0.061572703094747674, 'weights_max': 0.08792984116719807, 'weights_std': 0.027443818401116336}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023305486907554056, 'portfolio_value': 1.9553303515437856, 'weights_q0.25': 0.01831464545518396, 'weights_q0.5': 0.021185892781780916, 'weights_q0.75': 0.039873542267227356, 'weights_max': 0.12126194305069078, 'weights_std': 0.03075571404

{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.002355419910038135, 'portfolio_value': 2.103389979664054, 'weights_q0.25': 0.016133938566030716, 'weights_q0.5': 0.022530461272932123, 'weights_q0.75': 0.048434192202613746, 'weights_max': 0.08850044643644527, 'weights_std': 0.025247438760415172}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023493074394306057, 'portfolio_value': 2.100280850850961, 'weights_q0.25': 0.014100639080291015, 'weights_q0.5': 0.023738596567957487, 'weights_q0.75': 0.05699710543220796, 'weights_max': 0.07711032711101068, 'weights_std': 0.02305212080576337}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023501061805862337, 'portfolio_value': 2.1033450316246785, 'weights_q0.25': 0.016383750610406273, 'weights_q0.5': 0.02065157641091931, 'weights_q0.75': 0.04080108665599535, 'weights_max': 0.10230248400098996, 'weights_std': 0.0288578099803

{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023937975684582857, 'portfolio_value': 2.302833260856869, 'weights_q0.25': 0.013509643974479634, 'weights_q0.5': 0.019116597544060435, 'weights_q0.75': 0.04912401749021151, 'weights_max': 0.08747829028388249, 'weights_std': 0.02732901255327852}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.002387495608118661, 'portfolio_value': 2.299158270618269, 'weights_q0.25': 0.012008209984090957, 'weights_q0.5': 0.024896290901415187, 'weights_q0.75': 0.049615586152660464, 'weights_max': 0.08502637427756289, 'weights_std': 0.025174772155809846}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.002392003707653344, 'portfolio_value': 2.305891574177822, 'weights_q0.25': 0.014018992707647358, 'weights_q0.5': 0.015203595417554939, 'weights_q0.75': 0.056214024198003185, 'weights_max': 0.10598757074953735, 'weights_std': 0.0333980485189

{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023318240423037093, 'portfolio_value': 2.37612869910748, 'weights_q0.25': 0.012220295148798337, 'weights_q0.5': 0.03377210775484664, 'weights_q0.75': 0.0559136337795632, 'weights_max': 0.06434736102698067, 'weights_std': 0.02079719156150985}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023278124721923853, 'portfolio_value': 2.3743687216362326, 'weights_q0.25': 0.009667057585442177, 'weights_q0.5': 0.044659917642381496, 'weights_q0.75': 0.05585219626843166, 'weights_max': 0.059694859339648604, 'weights_std': 0.0213172446497137}
{'min_reward': -0.03779753005124128, 'max_reward': 0.03937106781691673, 'mean_reward': 0.0023232275789845094, 'portfolio_value': 2.3720153581431838, 'weights_q0.25': 0.013718982435250549, 'weights_q0.5': 0.02768072078303238, 'weights_q0.75': 0.059456642724444085, 'weights_max': 0.06598004011151709, 'weights_std': 0.022176541558371

{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.0026080652006337645, 'portfolio_value': 2.8349668730889017, 'weights_q0.25': 0.013311370027810345, 'weights_q0.5': 0.023015925723911337, 'weights_q0.75': 0.04939444367924961, 'weights_max': 0.10220080556242304, 'weights_std': 0.028096785549689998}
{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.00257741017243322, 'portfolio_value': 2.804222267607343, 'weights_q0.25': 0.011427024672046199, 'weights_q0.5': 0.017802645461226004, 'weights_q0.75': 0.05932013077851868, 'weights_max': 0.08909031871460615, 'weights_std': 0.029591859235361454}
{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.0025747331642938897, 'portfolio_value': 2.8038844159160456, 'weights_q0.25': 0.012535445390429452, 'weights_q0.5': 0.018657350275983073, 'weights_q0.75': 0.06818963683523555, 'weights_max': 0.09451856360576967, 'weights_std': 0.030011347885

{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.002864770667031315, 'portfolio_value': 3.2772976430838248, 'weights_q0.25': 0.011341673355829078, 'weights_q0.5': 0.012560526849126865, 'weights_q0.75': 0.07183307869817895, 'weights_max': 0.08806370806158834, 'weights_std': 0.032258834866698935}
{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.00285910550595517, 'portfolio_value': 3.27367580431867, 'weights_q0.25': 0.011773881037178632, 'weights_q0.5': 0.016922428996843993, 'weights_q0.75': 0.07088434547148591, 'weights_max': 0.07401515605662573, 'weights_std': 0.02737524619506551}
{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.002852697666481955, 'portfolio_value': 3.2691915257883206, 'weights_q0.25': 0.011299380148285224, 'weights_q0.5': 0.015000278306457716, 'weights_q0.75': 0.0726480562178249, 'weights_max': 0.07705714634550241, 'weights_std': 0.02815345592530005

{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.0029561506007429236, 'portfolio_value': 3.5828545281004227, 'weights_q0.25': 0.010750135196511829, 'weights_q0.5': 0.014042217350002861, 'weights_q0.75': 0.07134926607564712, 'weights_max': 0.08028743139192714, 'weights_std': 0.029569432139095714}
{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.002936316406647273, 'portfolio_value': 3.5617518012631417, 'weights_q0.25': 0.011060826220410184, 'weights_q0.5': 0.03052599331904366, 'weights_q0.75': 0.07008984468294403, 'weights_max': 0.08031782374579355, 'weights_std': 0.026013690755051555}
{'min_reward': -0.15280873134165546, 'max_reward': 0.11525351541062456, 'mean_reward': 0.002962145567243989, 'portfolio_value': 3.5960447186342024, 'weights_q0.25': 0.015209474470183705, 'weights_q0.5': 0.017023940121229944, 'weights_q0.75': 0.040169582149386956, 'weights_max': 0.10463761389247567, 'weights_std': 0.03160180942

{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0030802607407270336, 'portfolio_value': 3.905770619241878, 'weights_q0.25': 0.011178774136143644, 'weights_q0.5': 0.03685393586410961, 'weights_q0.75': 0.05924117573005374, 'weights_max': 0.06666713327424385, 'weights_std': 0.022300628011566473}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.003093721119209883, 'portfolio_value': 3.925932100277341, 'weights_q0.25': 0.011835405098105746, 'weights_q0.5': 0.030307246703188753, 'weights_q0.75': 0.056017856770950565, 'weights_max': 0.07697785847220856, 'weights_std': 0.02292169788052626}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.003088989315467429, 'portfolio_value': 3.9230164306436346, 'weights_q0.25': 0.0099776315031553, 'weights_q0.5': 0.03283253017852489, 'weights_q0.75': 0.0636566336882692, 'weights_max': 0.06939033028240277, 'weights_std': 0.024253309543699954

{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0030449398587739465, 'portfolio_value': 4.071084591180766, 'weights_q0.25': 0.010786894702030326, 'weights_q0.5': 0.03650498690274865, 'weights_q0.75': 0.05678143948740507, 'weights_max': 0.06712712956947335, 'weights_std': 0.02168826450785928}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0030484547583105215, 'portfolio_value': 4.078832466619477, 'weights_q0.25': 0.011436656049981318, 'weights_q0.5': 0.029059393225636686, 'weights_q0.75': 0.05305856990291773, 'weights_max': 0.07667915627578374, 'weights_std': 0.0229316616963458}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0030509787990576766, 'portfolio_value': 4.085260611938228, 'weights_q0.25': 0.010828572390856637, 'weights_q0.5': 0.027832926556455702, 'weights_q0.75': 0.06227061862409282, 'weights_max': 0.0727072117085254, 'weights_std': 0.02505014167812812

{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002972895073149459, 'portfolio_value': 4.141242836897199, 'weights_q0.25': 0.011588162334502347, 'weights_q0.5': 0.01951082008335018, 'weights_q0.75': 0.05431039556539682, 'weights_max': 0.08156634593678543, 'weights_std': 0.026065946109570932}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002983019200895874, 'portfolio_value': 4.158328766048851, 'weights_q0.25': 0.012975730587353719, 'weights_q0.5': 0.03176272425552824, 'weights_q0.75': 0.05496690885543239, 'weights_max': 0.08140113654366596, 'weights_std': 0.023568616899419675}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002980914505308709, 'portfolio_value': 4.158375734905651, 'weights_q0.25': 0.011763227399284499, 'weights_q0.5': 0.027698017929787648, 'weights_q0.75': 0.05731940331272345, 'weights_max': 0.08285718555303502, 'weights_std': 0.02461884522981522

{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002873888069931144, 'portfolio_value': 4.198750470169406, 'weights_q0.25': 0.012325134023364461, 'weights_q0.5': 0.015518835215777767, 'weights_q0.75': 0.06291430042944149, 'weights_max': 0.10061165962498415, 'weights_std': 0.03326785090660562}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002874273818825081, 'portfolio_value': 4.202188323122273, 'weights_q0.25': 0.015553363895036285, 'weights_q0.5': 0.01642027474057049, 'weights_q0.75': 0.034354150112052464, 'weights_max': 0.12464193306278372, 'weights_std': 0.03528887133274442}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0028761503201460022, 'portfolio_value': 4.2078079183736055, 'weights_q0.25': 0.015241895543677842, 'weights_q0.5': 0.015724477766713686, 'weights_q0.75': 0.02966854972621718, 'weights_max': 0.1251851923621306, 'weights_std': 0.0366497558880182

{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0027806594564589587, 'portfolio_value': 4.2182603954482465, 'weights_q0.25': 0.011950886707238772, 'weights_q0.5': 0.020765475415983926, 'weights_q0.75': 0.06227287491181196, 'weights_max': 0.08101298461671758, 'weights_std': 0.0270075646347273}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002773486862860712, 'portfolio_value': 4.210153057822568, 'weights_q0.25': 0.012093554155611651, 'weights_q0.5': 0.013085875950152046, 'weights_q0.75': 0.07713977016267157, 'weights_max': 0.08933297111833483, 'weights_std': 0.031720273376345226}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002764074043456422, 'portfolio_value': 4.198628472010312, 'weights_q0.25': 0.011267627468512637, 'weights_q0.5': 0.013800989921474164, 'weights_q0.75': 0.07752501604318099, 'weights_max': 0.0865200757806375, 'weights_std': 0.0318201858672156

{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002684850318064945, 'portfolio_value': 4.255487754132945, 'weights_q0.25': 0.01228064550979591, 'weights_q0.5': 0.020692325560682626, 'weights_q0.75': 0.06406784088174543, 'weights_max': 0.08650472925291784, 'weights_std': 0.02818462349827635}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0026725439320830174, 'portfolio_value': 4.238654676283673, 'weights_q0.25': 0.011241042277233266, 'weights_q0.5': 0.016118811206600324, 'weights_q0.75': 0.0699017838016866, 'weights_max': 0.0820755511541388, 'weights_std': 0.029004827747906022}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0026677066870437823, 'portfolio_value': 4.23365051233849, 'weights_q0.25': 0.013680571134702387, 'weights_q0.5': 0.02845238815128802, 'weights_q0.75': 0.05764775143338621, 'weights_max': 0.07812505007605654, 'weights_std': 0.02362374499567054}


{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0024663207209540414, 'portfolio_value': 4.044765982364633, 'weights_q0.25': 0.015071252466796806, 'weights_q0.5': 0.02558231402515066, 'weights_q0.75': 0.05598669146351233, 'weights_max': 0.08781130137599338, 'weights_std': 0.022760675558924748}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0024608513139771893, 'portfolio_value': 4.038257006236573, 'weights_q0.25': 0.01674211766672966, 'weights_q0.5': 0.03220314281807489, 'weights_q0.75': 0.04921880959908648, 'weights_max': 0.07548683748389859, 'weights_std': 0.020749989026735922}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0024510387285807357, 'portfolio_value': 4.024605592329574, 'weights_q0.25': 0.013319689828614787, 'weights_q0.5': 0.028028167610569733, 'weights_q0.75': 0.053254339686394904, 'weights_max': 0.07740649479489846, 'weights_std': 0.02267531378162

{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0022917026599796513, 'portfolio_value': 3.916519845905231, 'weights_q0.25': 0.013718326677922301, 'weights_q0.5': 0.020437821368521528, 'weights_q0.75': 0.06583406287581853, 'weights_max': 0.09296189809327525, 'weights_std': 0.028248656716715663}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.002275769921243149, 'portfolio_value': 3.891566565325792, 'weights_q0.25': 0.013638092217791647, 'weights_q0.5': 0.02178937026407047, 'weights_q0.75': 0.04781804207890979, 'weights_max': 0.09381659107097808, 'weights_std': 0.026758365302367706}
{'min_reward': -0.15280873134165546, 'max_reward': 0.20630300645779137, 'mean_reward': 0.0022712914580730634, 'portfolio_value': 3.8861796847630186, 'weights_q0.25': 0.009911056061248511, 'weights_q0.5': 0.028048781344622638, 'weights_q0.75': 0.06339240827914856, 'weights_max': 0.07269055317891662, 'weights_std': 0.0253483716754

(1760, 1760)

In [95]:
portfolio_state = pd.DataFrame(index=train_env.venv.venv.envs[0].close_prices_pct.index, data=portfolio_values)
portfolio_state.to_csv('portfolios/' + model_name + '_portfolio.csv')

In [96]:
weights_df = pd.DataFrame(index=train_env.venv.venv.envs[0].close_prices_pct.columns, data=weights_states[-1])
weights_df.to_csv('portfolios/' + model_name + '_weights.csv')

## Tests

In [294]:
with open('data/sp500_data_5y.pickle', 'rb') as f:
    data = pickle.load(f)
    close_prices = pd.DataFrame({k: v['Close'] for k, v in data.items()})

FileNotFoundError: [Errno 2] No such file or directory: 'data/sp500_data_5y.pickle'

In [None]:
data['AAPL'].diff()

In [None]:
rand_w = softmax_normalization(np.random.randn(489))
rand_w

In [None]:
ones_w = softmax_normalization(np.ones(489))
ones_w

In [None]:
close_prices.diff()

In [None]:
(close_prices.pct_change().iloc[48] @ rand_w)

In [None]:
close_prices.pct_change().dropna().mean() @ rand_w

In [None]:
(close_prices.pct_change().dropna() @ rand_w).sum()

In [None]:
(close_prices.pct_change().dropna() @ ones_w).sum()