### Import Necessary Packages ###

In [125]:
import pandas as pd
import numpy as np

from stable_baselines3 import A2C, PPO, DDPG, TD3, SAC, DQN
from stable_baselines3.common.logger import configure

from finrl.meta.preprocessor.preprocessors import data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent 

from finrl import config
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
)
check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])

In [126]:
TRAIN_START_DATE = '2014-06-01'
TRAIN_END_DATE = '2021-01-01'
TEST_START_DATE = '2021-01-02'
TEST_END_DATE = '2023-04-01'
TIME_INTERVAL = '1D'

Load stocks data from datasets

In [127]:
data = pd.read_csv("datasets/processed_data.csv")

In [128]:
data

Unnamed: 0,date,tic,open,close,high,low,volume,amount,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2014-06-03,sh600028,8.72,8.79,8.87,8.69,423628.0,2.132521e+08,0.000000,8.790000,8.790000,0.000000,-66.666667,100.000000,8.790000,8.790000,0.000000
1,2014-06-03,sh600029,3.87,3.86,3.89,3.86,161877.0,3.766733e+07,0.000000,8.790000,8.790000,0.000000,-66.666667,100.000000,3.860000,3.860000,0.000000
2,2014-06-03,sh600276,366.83,374.56,376.35,366.83,27404.0,9.013581e+07,0.000000,8.790000,8.790000,0.000000,-66.666667,100.000000,374.560000,374.560000,0.000000
3,2014-06-03,sh600309,408.06,407.64,412.45,407.43,46705.0,7.839471e+07,0.000000,8.790000,8.790000,0.000000,-66.666667,100.000000,407.640000,407.640000,0.000000
4,2014-06-03,sh600436,88.60,88.07,89.26,87.86,3512.0,2.705420e+07,0.000000,8.790000,8.790000,0.000000,-66.666667,100.000000,88.070000,88.070000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65125,2023-04-28,sz002230,726.93,794.57,798.45,722.28,1064697.0,6.231633e+09,10.437358,888.264578,709.311422,55.666886,-14.914221,30.664523,777.236667,708.070333,89.449365
65126,2023-04-28,sz002352,178.19,176.54,179.66,175.82,153159.0,8.686714e+08,0.983021,182.392753,167.429247,52.808157,80.806878,25.694144,172.571000,172.061500,89.449365
65127,2023-04-28,sz002594,262.72,257.11,264.22,255.25,191945.0,4.933613e+09,0.315811,260.448325,241.664675,50.493915,142.661589,22.906747,249.163000,258.980333,89.449365
65128,2023-04-28,sz002714,864.07,853.28,865.79,848.82,225879.0,1.082817e+09,-5.187115,897.822024,834.556976,47.085182,-37.662762,2.986640,861.463000,869.143167,89.449365


In [129]:
train = data_split(data, TRAIN_START_DATE, TRAIN_END_DATE)
test = data_split(data, TEST_START_DATE, TEST_END_DATE)

In [130]:
train.shape

(48240, 17)

In [131]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 30, State Space: 301


In [132]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

train_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

env_train = StockTradingEnv(df = train, **train_kwargs)

In [133]:
agent = DRLAgent(env = env_train)

PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

# set up logger
tmp_path = RESULTS_DIR + "/ppo"
new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
# Set new logger
model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to results/ppo


In [134]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=50000)

-----------------------------------
| rollout/           |            |
|    ep_len_mean     | 1.61e+03   |
|    ep_rew_mean     | 132        |
| time/              |            |
|    fps             | 168        |
|    iterations      | 1          |
|    time_elapsed    | 12         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.08663344 |
-----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.61e+03     |
|    ep_rew_mean          | 79.4         |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 2            |
|    time_elapsed         | 24           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.015913932  |
|    clip_fraction        | 0.208        |
|    clip_range           | 0.2          |
|    entr

In [None]:
trained_ppo.save(TRAINED_MODEL_DIR + "/ppo")

In [135]:
test_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}
    

env_test = StockTradingEnv(df = test, turbulence_threshold = 70, **test_kwargs)

In [136]:
df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=trained_ppo, 
    environment = env_test)

hit end!


In [137]:
# conbime the df account value and the df_actions
merged_df = df_account_value_ppo.merge(df_actions_ppo, on="date", how="inner")

merged_df.to_csv("results/result_ppo.csv")

In [138]:
merged_df

Unnamed: 0,date,account_value,sh600028,sh600029,sh600276,sh600309,sh600436,sh600519,sh600887,sh600900,...,sh601939,sz000002,sz000725,sz000858,sz000895,sz002230,sz002352,sz002594,sz002714,sz300015
0,2021-01-04,1.000000e+06,4,6,11,0,13,0,0,9,...,0,0,0,2,11,0,5,18,20,0
1,2021-01-05,1.005347e+06,4,6,11,0,13,0,0,9,...,0,0,0,2,11,0,5,18,20,0
2,2021-01-06,1.006488e+06,-8,-12,-22,0,-26,0,0,-18,...,0,0,0,-4,-22,0,-10,-36,-40,0
3,2021-01-07,1.006222e+06,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2021-01-08,1.006222e+06,4,6,11,0,13,0,0,9,...,0,0,0,2,11,0,5,18,20,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
538,2023-03-24,6.655110e+05,-9,32,0,0,0,0,0,0,...,0,7,24,0,-5,0,0,2,-22,1
539,2023-03-27,6.722534e+05,-12,35,0,0,0,0,0,0,...,0,9,22,0,-5,0,-1,0,-25,0
540,2023-03-28,6.701566e+05,-5,35,0,0,0,0,0,0,...,0,9,22,0,-5,0,-1,0,-25,0
541,2023-03-29,6.721989e+05,0,35,0,0,0,0,0,0,...,0,9,0,0,-5,0,-1,0,-25,0
