### Import Necessary Packages ###

In [1]:
import pandas as pd

from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args

from stable_baselines3 import A2C, PPO, DDPG, TD3, SAC
from stable_baselines3.common.logger import configure

from finrl.meta.preprocessor.preprocessors import data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent, DRLEnsembleAgent
from finrl.plot import backtest_stats

from finrl import config
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
)
check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])

In [2]:
TRAIN_START_DATE = '2014-06-01'
TRAIN_END_DATE = '2021-01-01'
TEST_START_DATE = '2021-01-02'
TEST_END_DATE = '2023-04-01'
TIME_INTERVAL = '1D'

INDICATORS = [
    "macd",
    "adx",
    "rsi_30",
    "boll_ub",
    "boll_lb",
    "close_30_sma",
    "close_60_sma",
]

Load stocks data from datasets

In [3]:
data = pd.read_csv("datasets/processed_data.csv")

train = data_split(data, TRAIN_START_DATE, TRAIN_END_DATE)
test = data_split(data, TEST_START_DATE, TEST_END_DATE)

Calculate all the parameters needed in the training process

*   **stock_dimension**: the number of unique stocks
*   **state_space**: the dimension of state space
*   **action_space**: the dimension of action space
*   **buy_cost_list**: a list of transaction cost percentages for buying stocks
*   **sell_cost_list**: a list of transaction cost percentages for selling stocks
*   **num_stock_shares**: the number of shares to buy/sell every time

In [4]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

In [5]:
MODELS = {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO}
# Parameter spaces for each model
a2c_space = [
    Integer(10, 200, name="n_steps"),
    Real(0.001, 0.1, "log-uniform", name="ent_coef"),
    Real(1e-5, 1e-2, "log-uniform", name="learning_rate"),
    Integer(32, 256, name="batch_size"),
]
ddpg_space = [
    Integer(10000, 1000000, name="buffer_size"),
    Real(1e-5, 1e-2, "log-uniform", name="learning_rate"),
    Integer(32, 256, name="batch_size"),
    Real(0.001, 0.1, "log-uniform", name="tau"),
]
td3_space = [
    Integer(10000, 1000000, name="buffer_size"),
    Real(1e-5, 1e-2, "log-uniform", name="learning_rate"),
    Integer(32, 256, name="batch_size"),
    Real(0.001, 0.1, "log-uniform", name="tau"),
    Real(0.01, 0.5, "log-uniform", name="policy_noise"),
    Real(0.01, 0.5, "log-uniform", name="noise_clip"),
    Integer(1, 10, name="policy_freq"),
]
sac_space = [
    Integer(10000, 1000000, name="buffer_size"),
    Real(1e-5, 1e-2, "log-uniform", name="learning_rate"),
    Integer(32, 256, name="batch_size"),
    Real(0.001, 0.1, "log-uniform", name="tau"),
    Real(0.001, 0.1, "log-uniform", name="ent_coef"),
]
ppo_space = [
    Integer(10, 200, name="n_steps"),
    Real(0.001, 0.1, "log-uniform", name="ent_coef"),
    Real(1e-5, 1e-2, "log-uniform", name="learning_rate"),
    Integer(32, 256, name="batch_size"),
]

param_spaces = {
    "a2c": a2c_space,
    "ppo": ppo_space,
    "sac": sac_space,
    "td3": td3_space,
    "ddpg": ddpg_space
}

env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}
stock_env = StockTradingEnv(df=train, **env_kwargs)

### Helper functions ###

In [6]:
def model_train(model_name, env, model_kwargs):
    # model training
    agent = DRLAgent(env)
    model = agent.get_model(model_name, model_kwargs = model_kwargs)
    # set up logger
    tmp_path = RESULTS_DIR + f"{model_name}"
    new_logger_ppo = configure(tmp_path, ["tensorboard", "stdout"])
    model.set_logger(new_logger_ppo)

    trained_model = agent.train_model(model=model,
                                        tb_log_name=model_name,
                                        total_timesteps=50000)
    return trained_model

def model_eval(model_name, env, trained_model):
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_model, environment = env)
    stats  = backtest_stats(account_value=df_account_value)
    print(f"=============={model_name} Results===========")
    print(stats)

    # select the performance metric for tuning
    cumulative_returns = stats['Cumulative returns']
    sharpe_ratio = stats['Sharpe ratio']
    performance_metric = cumulative_returns + sharpe_ratio
    return -performance_metric

In [7]:
@use_named_args(param_spaces["a2c"])
def evaluate_a2c(**params):
    model_kwargs = {
        "n_steps": params["n_steps"],
        "ent_coef": params["ent_coef"],
        "learning_rate": params["learning_rate"],
        "batch_size": params["batch_size"],
    }
    trained_model = model_train("a2c", stock_env, model_kwargs)
    env = StockTradingEnv(df = test, turbulence_threshold = 70, **env_kwargs)
    return model_eval("ppo", env, trained_model)

@use_named_args(param_spaces["ddpg"])
def evaluate_ddpg(**params):
    model_kwargs = {
        "buffer_size": params["buffer_size"],
        "learning_rate": params["learning_rate"],
        "batch_size": params["batch_size"],
        "tau": params["tau"],
    }
    trained_model = model_train("ddpg", stock_env, model_kwargs)
    env = StockTradingEnv(df = test, turbulence_threshold = 70, **env_kwargs)
    return model_eval("ppo", env, trained_model)

@use_named_args(param_spaces["td3"])
def evaluate_td3(**params):
    model_kwargs = {
        "buffer_size": params["buffer_size"],
        "learning_rate": params["learning_rate"],
        "batch_size": params["batch_size"],
        "tau": params["tau"],
        "policy_noise": params["policy_noise"],
        "noise_clip": params["noise_clip"],
        "policy_freq": params["policy_freq"],
    }
    trained_model = model_train("td3", stock_env, model_kwargs)
    env = StockTradingEnv(df = test, turbulence_threshold = 70, **env_kwargs)
    return model_eval("ppo", env, trained_model)

@use_named_args(param_spaces["sac"])
def evaluate_sac(**params):
    model_kwargs = {
        "buffer_size": params["buffer_size"],
        "learning_rate": params["learning_rate"],
        "batch_size": params["batch_size"],
        "tau": params["tau"],
        "ent_coef": params["ent_coef"],
    }
    trained_model = model_train("sac", stock_env, model_kwargs)
    env = StockTradingEnv(df = test, turbulence_threshold = 70, **env_kwargs)
    return model_eval("ppo", env, trained_model)

@use_named_args(param_spaces["ppo"])
def evaluate_ppo(**params):
    model_kwargs = {
        "n_steps": params["n_steps"],
        "ent_coef": params["ent_coef"],
        "learning_rate": params["learning_rate"],
        "batch_size": params["batch_size"],
    }
    trained_model = model_train("ppo", stock_env, model_kwargs)
    env = StockTradingEnv(df = test, turbulence_threshold = 70, **env_kwargs)
    return model_eval("ppo", env, trained_model)

In [None]:
a2c_res = gp_minimize(evaluate_ppo, param_spaces["a2c"], n_calls=50)
print("Minimum parameter:", a2c_res.x)
print("Minimum objective: ", a2c_res.fun)

ddpg_res = gp_minimize(evaluate_ddpg, param_spaces["ddpg"], n_calls=50)
print("Minimum parameter:", ddpg_res.x)
print("Minimum objective: ", ddpg_res.fun)

td3_res = gp_minimize(evaluate_td3, param_spaces["td3"], n_calls=50)
print("Minimum parameter:", td3_res.x)
print("Minimum objective: ", td3_res.fun)

sac_res = gp_minimize(evaluate_sac, param_spaces["sac"], n_calls=50)
print("Minimum parameter:", sac_res.x)
print("Minimum objective: ", sac_res.fun)

ppo_res = gp_minimize(evaluate_ppo, param_spaces["ppo"], n_calls=50)
print("Minimum parameter:", ppo_res.x)
print("Minimum objective: ", ppo_res.fun)

with open("res.txt", "w") as output:
    output.write(str(a2c_res.x))
    output.write(str(a2c_res.fun))
    output.write(str(ddpg_res.x))
    output.write(str(ddpg_res.fun))
    output.write(str(td3_res.x))
    output.write(str(td3_res.fun))
    output.write(str(sac_res.x))
    output.write(str(sac_res.fun))
    output.write(str(ppo_res.x))
    output.write(str(ppo_res.fun))

### Emsamble DRL Agent ###

In [None]:
rebalance_window = 63 # rebalance_window is the number of days to retrain the model
validation_window = 63 # validation_window is the number of days to do validation and trading (e.g. if validation_window=63, then both validation and trading period will be 63 days)

ensemble_agent = DRLEnsembleAgent(df=train,
                 train_period=(TRAIN_START_DATE,TRAIN_END_DATE),
                 val_test_period=(TEST_START_DATE,TEST_END_DATE),
                 rebalance_window=rebalance_window, 
                 validation_window=validation_window, 
                 **env_kwargs)

Set the hyperparameters

In [None]:
A2C_model_kwargs = {
                    'n_steps': 5,
                    'ent_coef': 0.005,
                    'learning_rate': 0.0007
                    }

PPO_model_kwargs = {
                    "ent_coef":0.01,
                    "n_steps": 2048,
                    "learning_rate": 0.00025,
                    "batch_size": 128
                    }

DDPG_model_kwargs = {
                      #"action_noise":"ornstein_uhlenbeck",
                      "buffer_size": 10_000,
                      "learning_rate": 0.0005,
                      "batch_size": 64
                    }

timesteps_dict = {'a2c' : 10_000, 
                 'ppo' : 10_000, 
                 'ddpg' : 10_000
                 }

Start training with ensemble DRL agents

In [None]:
df_summary = ensemble_agent.run_ensemble_strategy(A2C_model_kwargs,
                                                 PPO_model_kwargs,
                                                 DDPG_model_kwargs,
                                                 timesteps_dict)