In [1]:
## install required packages
!pip install swig
!pip install wrds
!pip install pyportfolioopt
## install finrl library
!pip install -q condacolab
import condacolab
condacolab.install()
!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

[0m✨🍰✨ Everything looks OK!
Collecting git+https://github.com/AI4Finance-Foundation/FinRL.git
  Cloning https://github.com/AI4Finance-Foundation/FinRL.git to /tmp/pip-req-build-ou6j21te
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/FinRL.git /tmp/pip-req-build-ou6j21te
  Resolved https://github.com/AI4Finance-Foundation/FinRL.git to commit 1410c340ded1a2fb49b16caf972e55b3d1d6efbb
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting elegantrl@ git+https://github.com/AI4Finance-Foundation/ElegantRL.git#egg=elegantrl
  Cloning https://github.com/AI4Finance-Foundation/ElegantRL.git to /tmp/pip-install-8dr1mnug/elegantrl_26147765953f481188d519ee091d6248
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/ElegantRL.git /tmp/pip-install-8dr1mnug/elegantrl_26147765953f48118

In [2]:
import datetime
import itertools
import os
import sys

import gymnasium as gym
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pprint import pprint
from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3
from stable_baselines3.common.logger import configure
from stable_baselines3.common.vec_env import DummyVecEnv

sys.path.append("../FinRL")

from finrl import config, config_tickers
from finrl.agents.stablebaselines3.models import DRLAgent, TensorboardCallback
from finrl.config import (
    DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR,
    RESULTS_DIR, INDICATORS, TRAIN_START_DATE, TRAIN_END_DATE,
    TEST_START_DATE, TEST_END_DATE, TRADE_START_DATE, TRADE_END_DATE
)
from finrl.main import check_and_make_directories
from finrl.meta.data_processor import DataProcessor
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from gymnasium.utils import seeding

%matplotlib inline

check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])



In [3]:
class StockEnvMine(gym.Env):

    metadata = {"render.modes": ["human"]}

    def __init__(
            self,
            df,
            hmax,
            initial_amount,
            num_stock_shares,
            buy_cost_pct,
            sell_cost_pct,
            state_space,
            stock_dim,
            tech_indicator_list,
            reward_scaling,
            action_space,
            initial=True,
            last_state=[],
            turbulence_th=None,
            plots=False,
            risk_indicator = 'turbulence',
            mode="",
            model_name="",
            iteration="",
    ):
        self.df = df
        self.day = 0
        self.data = self.df.loc[self.day, :]
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.num_stock_shares = num_stock_shares
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.state_space = state_space
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(state_space,))
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(action_space,))
        self.stock_dim = stock_dim
        self.tech_indicator_list = tech_indicator_list
        self.reward_scaling = reward_scaling
        self.turbulence_th = turbulence_th
        self.plots = plots
        self.initial = initial
        self.terminal = False
        self.risk_indicator = risk_indicator
        self.state = self.initilize_state()
        self.log_every = 1
        self.mode = mode
        self.model_name = model_name
        self.iteration = iteration

        self.reward = 0
        self.turbulence= 0
        self.cost = 0
        self.trades = 0
        self.episode = 0
        self.asset_memory = [self.initial_amount + np.sum(np.array(self.num_stock_shares) * np.array(self.state[1:self.stock_dim+1]))]
        self.reward_memory = []
        self.actions_memory = []
        self.state_memory = ([])
        self.date_memory = [self.getDate()]
        self.seed()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def initilize_state(self):
        if self.initial:
            state = ([self.initial_amount] + self.data.close.values.tolist() + self.num_stock_shares + sum((self.data[tech].values.tolist() for tech in self.tech_indicator_list), []))
        else:
            state = ([self.last_state[0]] + self.data.close.values.tolist() + self.last_state[(self.stock_dim + 1) : (self.stock_dim * 2 + 1)] + sum((self.data[tech].values.tolist() for tech in self.tech_indicator_list),[]))
        return state

    def getDate(self):
        return self.data.date.unique()[0]

    def render(self, mode="human", close=False):
        return self.state

    def reset(self, *, seed=None, options=None,):
        self.day = 0
        self.data = self.df.loc[self.day, :]
        self.state = self.initilize_state()
        self.asset_memory = [self.initial_amount + np.sum(np.array(self.num_stock_shares) * np.array(self.state[1:self.stock_dim+1]))]
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.terminal = False
        self.reward_memory = []
        self.actions_memory = []
        self.date_memory = [self.getDate()]
        self.episode += 1
        return self.state, {}

    def update(self):
        state = ([self.state[0]] + self.data.close.values.tolist() + list(self.state[(self.stock_dim + 1) : (2 * self.stock_dim + 1)]) + sum((self.data[tech].values.tolist() for tech in self.tech_indicator_list), []))
        return state

    def buy(self, index, action):
        if self.state[index + 2 * self.stock_dim + 1] != True:
            nums_can_buy = self.state[0] // (self.state[index + 1] * (1 + self.buy_cost_pct[index]))
            nums = min(nums_can_buy, action)
            amount = self.state[index + 1] * (1 + self.buy_cost_pct[index]) * nums
            self.state[0] -= amount
            self.state[index + self.stock_dim + 1] += nums
            self.cost += amount
            self.trades += 1
        else:
            nums = 0
        return nums

    def Action_Buy(self, index, action):
        if self.turbulence_th is None:
            nums = self.buy(index, action)
        else:
            if self.turbulence < self.turbulence_th:
                nums = self.buy(index, action)
            else:
                nums = 0
        return nums

    def sell(self, index, action):
        if self.state[index + 2 * self.stock_dim + 1] != True:
            if self.state[index + self.stock_dim + 1] > 0:
                nums_can_sell = self.state[index + self.stock_dim + 1]
                nums = min(nums_can_sell, abs(action))
                amount = self.state[index + 1] * (1 - self.sell_cost_pct[index]) * nums
                self.state[0] += amount
                self.state[index + self.stock_dim + 1] -= nums
                self.cost += self.state[index + 1] * self.sell_cost_pct[index] * nums
                self.trades += 1
            else:
                nums = 0
        else:
            nums = 0
        return nums

    def Action_Sell(self, index, action):
        if self.turbulence_th is None:
            nums = self.sell(index, action)
        else:
            if self.turbulence < self.turbulence_th:
                nums = self.sell(index, action)
            else:
                if self.state[index + 1] > 0:
                    if self.state[index + self.stock_dim + 1] > 0:
                        nums = self.state[index + self.stock_dim + 1]
                        amount = self.state[index + 1] * (1 - self.sell_cost_pct[index]) * nums
                        self.state[0] += amount
                        self.state[index + self.stock_dim + 1] = 0
                        self.cost += self.state[index + 1] * self.sell_cost_pct[index] * nums
                        self.trades += 1
                    else:
                        nums = 0
                else:
                    nums = 0
        return nums

    def makePlot(self):
        plt.plot(self.asset_memory, "r")
        plt.savefig(f"results/account_value_trade_{self.episode}.png")
        plt.close()

    def getDummyEnv(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs

    def saveAssetMemory(self):
        date_list = self.date_memory
        asset_list = self.asset_memory
        df_account_value = pd.DataFrame({"date": date_list, "account_value": asset_list})
        return df_account_value

    def saveActionMemory(self):
        date_list = self.date_memory[:-1]
        df_date = pd.DataFrame(date_list)
        df_date.columns = ["date"]
        action_list = self.actions_memory
        df_actions = pd.DataFrame(action_list)
        df_actions.columns = self.data.tic.values
        df_actions.index = df_date.date
        return df_actions

    def step(self, actions):
        self.terminal = (self.day >= len(self.df.index.unique()) - 1 or self.state[0] <= 0)
        if self.terminal:
            if self.plots:
                self.makePlot()
            end_asset = self.state[0] + sum(np.array(self.state[1 : (self.stock_dim + 1)]) * np.array(self.state[(self.stock_dim + 1) : (2 * self.stock_dim + 1)]))
            df_total_value = pd.DataFrame(self.asset_memory)
            total_reward = self.state[0] + sum(np.array(self.state[1 : (self.stock_dim + 1)]) * np.array(self.state[(self.stock_dim + 1) : (2 * self.stock_dim + 1)])) - self.initial_amount
            df_total_value.columns = ["account_value"]
            df_total_value["date"] = self.date_memory
            df_total_value["daily_return"] = df_total_value["account_value"].pct_change(1)
            if df_total_value["daily_return"].std() != 0:
                sharpe = (252 ** 0.5) * df_total_value["daily_return"].mean() / df_total_value["daily_return"].std()
            df_rewards = pd.DataFrame(self.reward_memory)
            df_rewards.columns = ["account_rewards"]
            df_rewards["date"] = self.date_memory[:-1]
            if self.episode % self.log_every == 0:
                print(f"day: {self.day}, episode: {self.episode}")
                print(f"begin_total_asset: {self.asset_memory[0]:0.2f}")
                print(f"end_total_asset: {end_asset:0.2f}")
                print(f"total_reward: {total_reward:0.2f}")
                print(f"total_cost: {self.cost:0.2f}")
                print(f"total_trades: {self.trades}")
                if df_total_value["daily_return"].std() != 0:
                    print(f"Sharpe: {sharpe:0.3f}")
                print("=================================")

            if (self.model_name != "") and (self.mode != ""):
                df_total_value.to_csv("results/account_value_{}_{}_{}.csv".format(self.mode, self.model_name, self.iteration),index=False,)
                df_rewards.to_csv("results/account_rewards_{}_{}_{}.csv".format(self.mode, self.model_name, self.iteration), index=False,)
                plt.plot(self.asset_memory, "r")
                plt.savefig("results/account_value_{}_{}_{}.png".format(self.mode, self.model_name, self.iteration))
                plt.close()
            return self.state, self.reward, self.terminal, False, {}
        else:
            actions = (actions * self.hmax).astype(int)
            if self.turbulence_th is not None:
                if self.turbulence >= self.turbulence_th:
                    actions = np.array([-self.hmax] * self.stock_dim)
            begin_asset = self.state[0] + sum(np.array(self.state[1 : (self.stock_dim + 1)]) * np.array(self.state[(self.stock_dim + 1) : (2 * self.stock_dim + 1)]))
            sort_action = np.argsort(actions)
            sell_index = sort_action[:np.where(actions < 0)[0].shape[0]]
            buy_index = sort_action[::-1][:np.where(actions > 0)[0].shape[0]]
            for index in sell_index:
                actions[index] = self.Action_Sell(index, actions[index]) * (-1)
            for index in buy_index:
                actions[index] = self.Action_Buy(index, actions[index])
            self.actions_memory.append(actions)

            self.day += 1
            self.data = self.df.loc[self.day, :]
            if self.turbulence_th is not None:
                self.turbulence = self.data[self.risk_indicator].values[0]
            self.state = self.update()
            end_total_asset = self.state[0] + sum(np.array(self.state[1 : (self.stock_dim + 1)]) * np.array(self.state[(self.stock_dim + 1) : (2 * self.stock_dim + 1)]))
            self.asset_memory.append(end_total_asset)
            self.date_memory.append(self.getDate())
            # Reward Function
            self.reward = end_total_asset - begin_asset
            self.reward_memory.append(self.reward)
            self.reward = self.reward * self.reward_scaling
            self.state_memory.append(self.state)
            return self.state, self.reward, self.terminal, False, {}


In [4]:
class Agent:
    def __init__(self, env, model_name, iter_num=0, policy="MlpPolicy", policy_kwargs=None, model_kwargs=None, verbose=1, seed=None, tensorboard_log=None):
        self.models = {"a2c": A2C, "ddpg": DDPG, "ppo": PPO, "a2c_ensemble": A2C, "ddpg_ensemble": DDPG, "ppo_ensemble": PPO}
        model_kwargs_dict = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in ["a2c", "ddpg", "ppo"]}
        model_kwargs_dict_ensemble = {x + "_ensemble": config.__dict__[f"{x.upper()}_PARAMS"] for x in ["a2c", "ddpg", "ppo"]}
        model_kwargs_dict.update(model_kwargs_dict_ensemble)
        self.model_name = model_name
        self.iter_num = iter_num
        if model_kwargs is None:
            self.model_kwargs = model_kwargs_dict[model_name]
        else:
            self.model_kwargs = model_kwargs
        self.model = self.models[model_name](policy=policy, env=env, verbose=verbose, seed=seed, tensorboard_log=tensorboard_log, policy_kwargs=policy_kwargs, **self.model_kwargs)

    def train(self, total_timesteps=5000):
        model = self.model.learn(total_timesteps=total_timesteps, tb_log_name="{}_{}".format(self.model_name, self.iter_num), callback=TensorboardCallback())
        model.save(f"{config.TRAINED_MODEL_DIR}/{self.model_name.upper()}_{total_timesteps // 1000}k_{self.iter_num}")
        return model

    def predict(self, env_new, deterministic=True):
        env, obs = env_new.getDummyEnv()
        account_memory = None
        actions_memory = None

        env.reset()
        max_step = len(env_new.df.index.unique()) - 1

        for i in range(max_step + 1):
            action, states = self.model.predict(obs, deterministic=deterministic)
            obs, rewards, dones, info = env.step(action)
            if i == max_step - 1:
                account_memory = env.env_method(method_name="saveAssetMemory")
                actions_memory = env.env_method(method_name="saveActionMemory")
            if dones[0]:
                print("Finished")
                break
        return account_memory[0], actions_memory[0]

    def predictLoadFromFile(self, env_new, cwd, deterministic=True):
        try:
            model = self.model.load(cwd)
            print("Model loaded from file")
        except BaseException as error:
            raise ValueError(f"Failed to load agent. Error: {str(error)}") from error

        state = env_new.reset()
        episode_returns = []
        episode_total_assets = [env_new.initial_amount]
        done = False
        while not done:
            action = model.predict(state, deterministic=deterministic)[0]
            state, reward, done, _ = env_new.step(action)
            episode_total_assets.append(state[0])
            episode_return = state[0] / env_new.initial_amount
            episode_returns.append(episode_return)
        print(f"Finish Trading. The final amount of money is: {episode_total_assets[-1]}. The total return is: {episode_returns[-1]}")
        return episode_total_assets, episode_returns

In [5]:
class EnsembleAgent:
    def __init__(self, df, train_period, val_period, rebalance_window, validation_window, env_args):
        self.df = df
        self.train_period = train_period
        self.val_period = val_period
        self.rebalance_window = rebalance_window
        self.validation_window = validation_window
        self.env_args = env_args
        self.unique_trade_date = df[(df.date > val_period[0]) & (df.date <= val_period[1])].date.unique()
        self.train_env = None

    def val(self, model, val_data, val_env, val_obs):
        for _ in range(len(val_data.index.unique())):
            action, _states = model.predict(val_obs)
            val_obs, rewards, dones, info = val_env.step(action)

    def predict(self, model, name, last_state, iter, tur_th, initial):
        trade_data = data_split(self.df, start=self.unique_trade_date[iter - self.rebalance_window], end=self.unique_trade_date[iter],)
        trade_env = DummyVecEnv([lambda: StockEnvMine(df=trade_data, turbulence_th=tur_th, iteration=iter, mode="trade", model_name=name, last_state=last_state, initial=initial, **self.env_args)])
        trade_obs = trade_env.reset()
        for i in range(len(trade_data.index.unique())):
            action, _ = model.predict(trade_obs)
            trade_obs, _, _, _ = trade_env.step(action)
            if i == (len(trade_data.index.unique()) - 2):
                last_state = trade_env.envs[0].render()

        df_last_state = pd.DataFrame({"last_state": last_state})
        df_last_state.to_csv(f"results/last_state_{name}_{i}.csv", index=False)
        return last_state

    def getSharpe(self, iter, model_name):
        df_total_value = pd.read_csv(f"results/account_value_validation_{model_name}_{iter}.csv")
        if df_total_value["daily_return"].var() == 0:
            if df_total_value["daily_return"].mean() > 0:
                return np.inf
            else:
                return 0
        else:
            return df_total_value["daily_return"].mean() / df_total_value["daily_return"].std() * np.sqrt(4)

    def train(self, A2C_kwargs=None, PPO_kwargs=None, DDPG_kwargs=None, timesteps={"a2c": 50000, "ppo": 50000, "ddpg": 50000}):
        tell = True
        a2c_sharpe = []
        ddpg_sharpe = []
        ppo_sharpe = []
        last_state = []

        model_order = []
        val_start_date = []
        val_end_date = []
        iteration_list = []

        insample_turbulence = self.df[(self.df.date >= self.train_period[0]) & (self.df.date < self.train_period[1])]
        insample_tur_threshold = np.quantile(insample_turbulence.turbulence.values, .90)
        for i in range(self.rebalance_window + self.validation_window, len(self.unique_trade_date) + self.rebalance_window + self.validation_window, self.rebalance_window):
            val_start = self.unique_trade_date[i - self.rebalance_window - self.validation_window]
            if i > len(self.unique_trade_date):
              tell = False
            if i - self.rebalance_window > len(self.unique_trade_date):
              end_index = -1
            else:
              end_index = i - self.rebalance_window
            val_end = self.unique_trade_date[end_index]
            # val_start = self.unique_trade_date[i]
            # val_end = self.unique_trade_date[i + self.rebalance_window]
            val_start_date.append(val_start)
            val_end_date.append(val_end)
            iteration_list.append(i)
            initial = (i - self.rebalance_window - self.validation_window == 0)
            end_date = self.df.index[self.df["date"] == self.unique_trade_date[i - self.rebalance_window - self.validation_window]].to_list()[-1]
            start_date = end_date - 63 + 1
            history_tur_mean = np.mean(self.df.iloc[start_date : (end_date + 1), :].drop_duplicates(subset=["date"]).turbulence.values)
            if history_tur_mean > insample_tur_threshold:
                tur_threshold = insample_tur_threshold
            else:
                tur_threshold = np.quantile(insample_turbulence.turbulence.values, 0.99)
            print("Turbulence threshold: ", tur_threshold)

            train = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window - self.validation_window],)
            validation = data_split(self.df, start=self.unique_trade_date[i - self.rebalance_window - self.validation_window], end=self.unique_trade_date[end_index],)
            self.train_env = DummyVecEnv([lambda: StockEnvMine(df=train, **self.env_args)])
            print("Model training from: {} to {}".format(self.train_period[0], self.unique_trade_date[i - self.rebalance_window - self.validation_window]))
            print("A2C Training: ")
            agent_a2c = Agent(env=self.train_env, iter_num=i, model_name="a2c_ensemble", model_kwargs=A2C_kwargs)
            trained_a2c = agent_a2c.train(total_timesteps=timesteps["a2c"])
            agent_a2c.model = trained_a2c
            print("A2C Validation from {} to {}".format(val_start, val_end))
            val_env_a2c = DummyVecEnv([lambda: StockEnvMine(df=validation, turbulence_th=tur_threshold, iteration=i, mode="validation", model_name="a2c_ensemble", **self.env_args)])
            val_obs_a2c = val_env_a2c.reset()
            self.val(agent_a2c.model, validation, val_env_a2c, val_obs_a2c)
            sharpe_a2c = self.getSharpe(i, model_name="a2c_ensemble")
            a2c_sharpe.append(sharpe_a2c)

            print("DDPG Training: ")
            agent_ddpg = Agent(env=self.train_env, iter_num=i, model_name="ddpg_ensemble", model_kwargs=DDPG_kwargs)
            trained_ddpg = agent_ddpg.train(total_timesteps=timesteps["ddpg"])
            agent_ddpg.model = trained_ddpg
            print("DDPG Validation from {} to {}".format(val_start, val_end))
            val_env_ddpg = DummyVecEnv([lambda: StockEnvMine(df=validation, turbulence_th=tur_threshold, iteration=i, mode="validation", model_name="ddpg_ensemble", **self.env_args)])
            val_obs_ddpg = val_env_ddpg.reset()
            self.val(agent_ddpg.model, validation, val_env_ddpg, val_obs_ddpg)
            sharpe_ddpg = self.getSharpe(i, model_name="ddpg_ensemble")
            ddpg_sharpe.append(sharpe_ddpg)

            print("PPO Training: ")
            agent_ppo = Agent(env=self.train_env, iter_num=i, model_name="ppo_ensemble", model_kwargs=PPO_kwargs)
            trained_ppo = agent_ppo.train(total_timesteps=timesteps["ppo"])
            agent_ppo.model = trained_ppo
            print("PPO Validation from {} to {}".format(val_start, val_end))
            val_env_ppo = DummyVecEnv([lambda: StockEnvMine(df=validation, turbulence_th=tur_threshold, iteration=i, mode="validation", model_name="ppo_ensemble", **self.env_args)])
            val_obs_ppo = val_env_ppo.reset()
            self.val(agent_ppo.model, validation, val_env_ppo, val_obs_ppo)
            sharpe_ppo = self.getSharpe(i, model_name="ppo_ensemble")
            ppo_sharpe.append(sharpe_ppo)

            print("Ensemble Model Training: ")
            if (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
                model_order.append("a2c")
                model_ensemble = agent_a2c.model
            elif (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
                model_order.append("ppo")
                model_ensemble = agent_ppo.model
            else:
                model_order.append("ddpg")
                model_ensemble = agent_ddpg.model

            if tell:
                last_state = self.predict(model=model_ensemble, name="ensemble", last_state=last_state, iter=i, tur_th=tur_threshold, initial=initial)

        df_summary = pd.DataFrame({"iteration": iteration_list, "Start Date": val_start_date, "End Date": val_end_date, "model_order": model_order, "a2c_sharpe": a2c_sharpe, "ddpg_sharpe": ddpg_sharpe, "ppo_sharpe": ppo_sharpe})
        return df_summary


In [6]:
TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2023-01-01'
TRADE_START_DATE = '2023-01-01'
TRADE_END_DATE = '2023-09-01'

In [7]:
df = YahooDownloader(start_date = TRAIN_START_DATE,
                     end_date = TRADE_END_DATE,
                     ticker_list = config_tickers.DOW_30_TICKER).fetch_data()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [8]:
df.sort_values(['date','tic'],ignore_index=True)

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2010-01-04,7.622500,7.660714,7.585000,6.478998,493729600,AAPL,0
1,2010-01-04,56.630001,57.869999,56.560001,41.817791,5277400,AMGN,0
2,2010-01-04,40.810001,41.099998,40.389999,33.300179,6894300,AXP,0
3,2010-01-04,55.720001,56.389999,54.799999,43.777546,6186700,BA,0
4,2010-01-04,57.650002,59.189999,57.509998,40.523598,7325600,CAT,0
...,...,...,...,...,...,...,...,...
100848,2023-08-31,492.359985,493.820007,476.290009,473.117889,4927700,UNH,3
100849,2023-08-31,245.589996,248.020004,245.449997,245.158066,5532600,V,3
100850,2023-08-31,34.849998,35.139999,34.759998,34.248959,24333200,VZ,3
100851,2023-08-31,25.590000,25.760000,25.180000,24.724798,10794500,WBA,3


In [9]:
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = INDICATORS,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

Successfully added technical indicators
[*********************100%%**********************]  1 of 1 completed
Shape of DataFrame:  (3438, 8)
Successfully added vix
Successfully added turbulence index


In [10]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)
processed_full.sort_values(['date','tic'],ignore_index=True)

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2010-01-04,AAPL,7.622500,7.660714,7.585000,6.478998,493729600.0,0.0,0.000000,6.500441,6.468757,100.000000,66.666667,100.000000,6.478998,6.478998,20.040001,0.000000
1,2010-01-04,AMGN,56.630001,57.869999,56.560001,41.817791,5277400.0,0.0,0.000000,6.500441,6.468757,100.000000,66.666667,100.000000,41.817791,41.817791,20.040001,0.000000
2,2010-01-04,AXP,40.810001,41.099998,40.389999,33.300179,6894300.0,0.0,0.000000,6.500441,6.468757,100.000000,66.666667,100.000000,33.300179,33.300179,20.040001,0.000000
3,2010-01-04,BA,55.720001,56.389999,54.799999,43.777546,6186700.0,0.0,0.000000,6.500441,6.468757,100.000000,66.666667,100.000000,43.777546,43.777546,20.040001,0.000000
4,2010-01-04,CAT,57.650002,59.189999,57.509998,40.523598,7325600.0,0.0,0.000000,6.500441,6.468757,100.000000,66.666667,100.000000,40.523598,40.523598,20.040001,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99697,2023-08-30,UNH,493.989990,496.709991,490.290009,487.959290,2283400.0,2.0,-1.366173,510.656666,480.459697,49.573615,-100.696731,5.966966,497.893921,485.160452,13.880000,7.267617
99698,2023-08-30,V,246.419998,248.229996,246.050003,245.706894,4573300.0,2.0,1.805051,245.028929,235.307539,59.424620,241.957153,16.612777,239.150280,235.436313,13.880000,7.267617
99699,2023-08-30,VZ,34.889999,34.950001,34.549999,33.916065,15021400.0,2.0,0.042518,33.674198,31.455613,52.358194,184.246607,24.916800,32.757136,33.562109,13.880000,7.267617
99700,2023-08-30,WBA,25.629999,25.770000,25.450001,25.008093,5883700.0,2.0,-1.042668,29.841716,23.664549,34.911829,-117.483859,52.212555,27.544158,28.400918,13.880000,7.267617


In [11]:
INDICATORS

['macd',
 'boll_ub',
 'boll_lb',
 'rsi_30',
 'cci_30',
 'dx_30',
 'close_30_sma',
 'close_60_sma']

In [12]:
processed_full = processed_full[processed_full['tic'] != 'DOW']

In [13]:
stock_dimension = len(processed_full.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [14]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [15]:
ensemble = EnsembleAgent(df=processed_full, train_period=[TRAIN_START_DATE, TRAIN_END_DATE], val_period=[TRADE_START_DATE, TRADE_END_DATE], rebalance_window=63, validation_window=63, env_args=env_kwargs)

In [16]:
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}

In [17]:
summary = ensemble.train(PPO_kwargs=PPO_PARAMS)

Turbulence threshold:  203.40535487964027
Model training from: 2010-01-01 to 2023-01-03
A2C Training: 


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Using cuda device
---------------------------------------
| time/                 |             |
|    fps                | 149         |
|    iterations         | 100         |
|    time_elapsed       | 3           |
|    total_timesteps    | 500         |
| train/                |             |
|    entropy_loss       | -41.1       |
|    explained_variance | 0.642       |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | -40.6       |
|    reward             | -0.15942453 |
|    std                | 1           |
|    value_loss         | 1.11        |
---------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 191       |
|    iterations         | 200       |
|    time_elapsed       | 5         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -41.2     |
|    explained_variance | -0.0425   |


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Turbulence threshold:  203.40535487964027
Model training from: 2010-01-01 to 2023-04-04
A2C Training: 
Using cuda device
---------------------------------------
| time/                 |             |
|    fps                | 268         |
|    iterations         | 100         |
|    time_elapsed       | 1           |
|    total_timesteps    | 500         |
| train/                |             |
|    entropy_loss       | -41.2       |
|    explained_variance | 0           |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | -48.8       |
|    reward             | -0.06479742 |
|    std                | 1           |
|    value_loss         | 2.52        |
---------------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 266         |
|    iterations         | 200         |
|    time_elapsed       | 3           |
|    total_timesteps    | 1000        |

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


--------------------------------------
| time/                 |            |
|    fps                | 263        |
|    iterations         | 100        |
|    time_elapsed       | 1          |
|    total_timesteps    | 500        |
| train/                |            |
|    entropy_loss       | -41.1      |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 99         |
|    policy_loss        | -62.3      |
|    reward             | -0.7534478 |
|    std                | 1          |
|    value_loss         | 3.14       |
--------------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 265        |
|    iterations         | 200        |
|    time_elapsed       | 3          |
|    total_timesteps    | 1000       |
| train/                |            |
|    entropy_loss       | -41.2      |
|    explained_variance | 0          |
|    learning_rate      |

In [18]:
summary

Unnamed: 0,iteration,Start Date,End Date,model_order,a2c_sharpe,ddpg_sharpe,ppo_sharpe
0,126,2023-01-03,2023-04-04,ddpg,0.151331,0.307817,-0.122026
1,189,2023-04-04,2023-07-06,a2c,0.184861,0.147493,-0.026791
2,252,2023-07-06,2023-08-30,ddpg,0.268472,0.30356,0.068405
