In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime

from finrl.config import config
from finrl.marketdata.yahoodownloader import YahooDownloader
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing.data import data_split
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
from finrl.trade.backtest import backtest_stats, backtest_plot, get_daily_return, get_baseline
from stable_baselines3.common.vec_env import VecCheckNan
from pprint import pprint
import sys
import itertools
import os
today = datetime.datetime.today()
# os.environ['CUDA_VISIBLE_DEVICES'] = ARGS.gpu

processed_full = pd.read_csv("data/datasets/processed_sz50.csv")
start_date = '2009-01-01'
end_date = '2019-01-01'
trade_date = '2021-01-01'
train = data_split(processed_full, start_date, end_date)
trade = data_split(processed_full, end_date, trade_date)
print(len(train))
print(len(trade))
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


data_turbulence = processed_full[(processed_full.date< end_date) & (processed_full.date>=start_date)]
insample_turbulence = data_turbulence.drop_duplicates(subset=['date'])
turbulence_threshold = np.quantile(insample_turbulence.turbulence.values,1)
print("turbulence_threshold:", turbulence_threshold)


env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4

}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
agent = DRLAgent(env = env_train)


  'Module "zipline.assets" not found; multipliers will not be applied'
EPISODE:1:   0%|                                       | 0/2431 [00:00<?, ?it/s]

121550
24350
Stock Dimension: 50, State Space: 301
turbulence_threshold: 2183.354543800645
<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [2]:

def get_model(model_name):
    if model_name == "s2c" or model_name == "ddpg":
        model = agent.get_model(model_name)
    elif model_name == "ppo":
        PPO_PARAMS = {
            "n_steps": 2048,
            "ent_coef": 0.01,
            "learning_rate": 0.00025,
            "batch_size": 128,
        }
        model = agent.get_model(model_name, model_kwargs = PPO_PARAMS)
    elif model_name == 'td3':
        TD3_PARAMS = {"batch_size": 100,
              "buffer_size": 1000000,
              "learning_rate": 0.001}
        model = agent.get_model("td3",model_kwargs = TD3_PARAMS)
    elif model_name == 'sac':
        SAC_PARAMS = {
            "batch_size": 128,
            "buffer_size": 1000000,
            "learning_rate": 0.0001,
            "learning_starts": 100,
            "ent_coef": "auto_0.1",
        }

        model = agent.get_model("sac",model_kwargs = SAC_PARAMS)
    return model


model_path = "./trained_models/"
def train(model_name, total_timesteps):
    model = get_model(model_name)
    trained = agent.train_model(model=model,
                             tb_log_name=model_name,
                             total_timesteps=total_timesteps)
    trained.save(model_path + model_name)
    return trained

def load(model_name):
    return agent.load_model(model_name,model_path+ model_name)


def load_or_train(model_name, total_timesteps = 30000):
    if os.path.exists(model_path + model_name + ".zip"):
        print("load " + model_name)
        return load(model_name)
    else:
        return train(model_name, total_timesteps)

#train("a2c", 100000)
#train("ddpg", 50000)
#train("ppo", 50000)
#train("td3", 30000)
#train("sac", 80000)

In [3]:
# train model
#train("td3", 10)
#m = load_or_train("ddpg", 10)
#m = load_or_train("ppo", 50000)
#m = load_or_train("ddpg", 50000)
# m = load_or_train("sac", 80000)
m = load_or_train("ddpg", 2)
print(m)


# simulate for trade
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = turbulence_threshold, **env_kwargs)
df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=m,
    environment = e_trade_gym)

print(df_account_value)
print(df_actions)


load ddpg



EPISODE:1:   0%|                                        | 0/487 [00:00<?, ?it/s][A

EPISODE:2:   0%|                                        | 0/487 [00:00<?, ?it/s][A[A

<stable_baselines3.ddpg.ddpg.DDPG object at 0x7f674754b588>




EPISODE:2:   0%|                                | 1/487 [00:00<01:46,  4.57it/s][A[A

EPISODE:1:   0%|                                        | 0/487 [00:00<?, ?it/s][A[A


EPISODE:2:  15%|████▋                          | 74/487 [00:00<00:44,  9.21it/s][A[A

EPISODE:2:  23%|██████▉                       | 112/487 [00:00<00:28, 13.01it/s][A[A

EPISODE:2:  31%|█████████▏                    | 149/487 [00:00<00:18, 18.31it/s][A[A

EPISODE:2:  38%|███████████▍                  | 186/487 [00:00<00:11, 25.61it/s][A[A

EPISODE:2:  46%|█████████████▋                | 223/487 [00:00<00:07, 35.53it/s][A[A

EPISODE:2:  53%|████████████████              | 260/487 [00:00<00:04, 48.73it/s][A[A

EPISODE:2:  61%|██████████████████▎           | 297/487 [00:01<00:02, 65.81it/s][A[A

EPISODE:2:  69%|████████████████████▌         | 334/487 [00:01<00:01, 87.30it/s][A[A

EPISODE:2:  76%|██████████████████████       | 371/487 [00:01<00:01, 113.17it/s][A[A

EPISODE:2:  84%|█████████████

hit end!
           date  account_value
0    2019-01-02   1.000000e+06
1    2019-01-03   9.988078e+05
2    2019-01-04   1.003929e+06
3    2019-01-07   1.004908e+06
4    2019-01-08   1.003308e+06
..          ...            ...
482  2020-12-25   2.612790e+06
483  2020-12-28   2.649823e+06
484  2020-12-29   2.661679e+06
485  2020-12-30   2.739417e+06
486  2020-12-31   2.813198e+06

[487 rows x 2 columns]
            sh.600000  sh.600009  sh.600016  sh.600028  sh.600030  sh.600031  \
date                                                                           
2019-01-02        100          0          0          0          0        100   
2019-01-03        100          0          0          0          0        100   
2019-01-04        100          0          0          0          0        100   
2019-01-07        100          0          0          0          0        100   
2019-01-08        100          0          0          0          0        100   
...               ...        ...   

In [4]:

print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

Annual return          0.707813
Cumulative returns     1.813198
Annual volatility      0.273856
Sharpe ratio           2.097305
Calmar ratio           3.742701
Stability              0.922184
Max drawdown          -0.189118
Omega ratio            1.434612
Sortino ratio          3.232045
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.309267
Daily value at risk   -0.032223
dtype: float64
