In [36]:
#Installing FinRL
# %%capture
# !pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git
  
# #Installing Optuna
# !pip install optuna
# !pip install dm_tree
# !pip install ray[tune]

In [9]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
import optuna
%matplotlib inline
from finrl.apps import config
from optuna.integration import PyTorchLightningPruningCallback
from finrl.neo_finrl.preprocessor.yahoodownloader import YahooDownloader
from finrl.neo_finrl.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.neo_finrl.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.neo_finrl.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy
from finrl.drl_agents.stablebaselines3.models import DRLAgent
from finrl.drl_agents.rllib.models import DRLAgent as DRLAgent_rllib
from finrl.neo_finrl.data_processor import DataProcessor
import joblib
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
import ray
from pprint import pprint

import sys
sys.path.append("../FinRL-Library")

import itertools

import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

  'Module "zipline.assets" not found; multipliers will not be applied'


In [10]:
%%capture
## Collecting and preprocessing data
#Custom ticker list dataframe download
ticker_list = config.DOW_30_TICKER
df = YahooDownloader(start_date = '2009-01-01',
                     end_date = '2021-10-01',
                     ticker_list = ticker_list).fetch_data()
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

In [37]:
## Setup datasets

# View a sample dataframe
# processed_full.sort_values(['date','tic'],ignore_index=True).head(10)

# Split into train and trade datasets
# train = data_split(processed_full, '2009-01-01','2020-07-01')
# trade = data_split(processed_full, '2020-07-01','2021-10-31')

train = data_split(processed_full, '2020-01-01','2020-12-31')
trade = data_split(processed_full, '2021-01-01','2021-10-31')
print(f"Training data size {len(train)}")
print(f"Trading data size {len(trade)}")
print ("Training data sample")
train.head()
print ("Trading data sample")
trade.head()

stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


Training data size 7308
Trading data size 5423
Training data sample


Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2020-01-02,AAPL,74.059998,75.150002,73.797501,73.988464,135480400.0,3.0,2.154897,73.908411,63.642892,76.464858,176.83605,58.795784,67.550025,64.192412,12.47,23.272415
0,2020-01-02,AMGN,243.0,243.190002,238.979996,226.630829,2088000.0,3.0,3.86482,233.019515,217.936161,68.821614,66.199773,38.323475,223.093698,209.903027,12.47,23.272415
0,2020-01-02,AXP,124.660004,126.269997,124.230003,122.641548,2708000.0,3.0,1.439061,124.543827,114.801936,59.231287,97.998034,28.072591,118.363037,116.619092,12.47,23.272415
0,2020-01-02,BA,328.549988,333.350006,327.700012,331.348572,4544400.0,3.0,-7.198674,354.759862,316.044056,44.297973,-77.659006,26.88983,345.004659,349.218463,12.47,23.272415
0,2020-01-02,CAT,149.0,150.550003,147.979996,143.137772,3311900.0,3.0,1.655272,143.835439,133.401873,61.562714,146.169559,26.046421,137.988371,134.240935,12.47,23.272415


Trading data sample


Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2021-01-04,AAPL,133.520004,133.610001,126.760002,128.617111,143301900.0,0.0,3.44119,136.578796,117.958036,56.613043,64.551755,8.436326,124.117176,120.070758,26.969999,37.684545
0,2021-01-04,AMGN,231.25,231.25,223.669998,219.858322,3088200.0,0.0,-0.412324,225.892903,215.124502,48.078722,28.665878,1.082054,219.309955,221.072275,26.969999,37.684545
0,2021-01-04,AXP,121.300003,121.800003,116.849998,116.679779,3472100.0,0.0,0.921035,123.386761,112.696052,54.746515,-8.552931,3.61834,117.849149,110.284337,26.969999,37.684545
0,2021-01-04,BA,210.0,210.199997,202.490005,202.720001,21225600.0,0.0,1.293101,241.66128,205.335718,50.209209,-132.9903,12.708438,220.805333,194.563834,26.969999,37.684545
0,2021-01-04,CAT,183.0,185.979996,180.25,178.422684,4078300.0,0.0,2.036097,179.122987,172.662123,59.272566,141.72756,34.923782,174.135918,167.630383,26.969999,37.684545


Stock Dimension: 29, State Space: 291


In [44]:
%%capture
## Setup Training environment
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)
# Setup Trading environment
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = None, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()
agent = DRLAgent(env = env_train)

## Soft-Actor critic model
# SAC_PARAMS = {
#     "batch_size": 128,
#     "buffer_size": 1000000,
#     "learning_rate": 0.0001,
#     "learning_starts": 100,
#     "ent_coef": "auto_0.1",
# }

# model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

# trained_sac = agent.train_model(model=model_sac, 
#                              tb_log_name='sac',
#                              total_timesteps=60000)


## DDPG model
model_ddpg = agent.get_model("ddpg")



In [40]:
%%time
%%capture
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000)

CPU times: user 2h 43min 51s, sys: 5.62 s, total: 2h 43min 56s
Wall time: 6min 4s


## Optuna baseline

Full [link](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py) of optimizable parameters for each algorithm

In [None]:
from IPython.display import clear_output

def sample_ddpg_params(trial:optuna.Trial):
  # Size of the replay buffer
  buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)])
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
  
  return {"buffer_size": buffer_size,
          "learning_rate":learning_rate,
          "batch_size":batch_size}

# Objective for tuning : Sharpe ratio
def calculate_sharpe(df):
  df['daily_return'] = df['account_value'].pct_change(1)
  if df['daily_return'].std() !=0:
    sharpe = (252**0.5)*df['daily_return'].mean()/ \
          df['daily_return'].std()
    return sharpe
  else:
    return 0
  
  
def objective(trial:optuna.Trial):
  #Trial will suggest a set of hyperparamters from the specified range
  hyperparameters = sample_ddpg_params(trial)
  model_ddpg = agent.get_model("ddpg",model_kwargs = hyperparameters, verbose=0)
  trained_ddpg = agent.train_model(model=model_ddpg,
                                  tb_log_name="ddpg_optuna",
                                  total_timesteps=50000)
  trained_ddpg.save('trained_models/optuna/ddpg_{}.pth'.format(trial.number))
  clear_output(wait=True)
  #For the given hyperparamters, determine the account value in the trading period
  df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym)
  #Calculate sharpe from the account value
  sharpe = calculate_sharpe(df_account_value)

  return sharpe

#Create a study object and specify the direction as 'maximize'
#As you want to maximize sharpe
#Pruner stops not promising iterations
#Use a pruner, else you will get error related to divergence of model
#You can also use Multivariate samplere
#sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(study_name="ddpg_study",direction='maximize',
                            sampler = sampler, pruner=optuna.pruners.HyperbandPruner())

#You can increase the n_trials for a better search space scanning
study.optimize(objective, n_trials=30,catch=(ValueError,))

[32m[I 2021-11-23 18:15:44,809][0m Trial 15 finished with value: 1.2639755288494556 and parameters: {'buffer_size': 100000, 'learning_rate': 0.7252659712570544, 'batch_size': 32}. Best is trial 3 with value: 2.1930470176329484.[0m


hit end!
{'buffer_size': 1000000, 'learning_rate': 2.578760640231829e-05, 'batch_size': 256}
day: 251, episode: 3210
begin_total_asset: 1000000.00
end_total_asset: 1152198.78
total_reward: 152198.78
total_cost: 1068.72
total_trades: 3470
Sharpe: 0.577
day: 251, episode: 3220
begin_total_asset: 1000000.00
end_total_asset: 1152198.78
total_reward: 152198.78
total_cost: 1068.72
total_trades: 3485
Sharpe: 0.577
day: 251, episode: 3230
begin_total_asset: 1000000.00
end_total_asset: 1152198.78
total_reward: 152198.78
total_cost: 1068.72
total_trades: 3489
Sharpe: 0.577
day: 251, episode: 3240
begin_total_asset: 1000000.00
end_total_asset: 1148363.25
total_reward: 148363.25
total_cost: 1225.92
total_trades: 3525
Sharpe: 0.577
day: 251, episode: 3250
begin_total_asset: 1000000.00
end_total_asset: 1176608.08
total_reward: 176608.08
total_cost: 1619.44
total_trades: 3026
Sharpe: 0.647
day: 251, episode: 3260
begin_total_asset: 1000000.00
end_total_asset: 1130053.23
total_reward: 130053.23
total_

In [None]:
#Get the best hyperparamters
print('Hyperparameters after tuning',study.best_params)
print('Hyperparameters before tuning',config.DDPG_PARAMS)
study.best_trial
from stable_baselines3 import DDPG
tuned_model_ddpg = DDPG.load('models/ddpg_{}.pth'.format(study.best_trial.number),env=env_train)
#Trading period account value with tuned model
df_account_value_tuned, df_actions_tuned = DRLAgent.DRL_prediction(
    model=tuned_model_ddpg, 
    environment = e_trade_gym)

perf_stats_all_tuned = backtest_stats(account_value=df_account_value_tuned)
perf_stats_all_tuned = pd.DataFrame(perf_stats_all_tuned)
print(f"Baseline stats: {perf_stats_all_tuned}")

## XCS229ii Algorithm