# INTRODUCTION
1. In this tutorial, we will be tuning hyperparameters for Stable baselines3 models using Optuna.
2. The default model hyperparamters may not be adequate for your custom portfolio or custom state-space. Reinforcement learning algorithms are sensitive to hyperparamters, hence tuning is an important step.
3. Hyperparamters are tuned based on an objective, which needs to be maximized or minimized. Here we tuned our hyperparamters to maximize the Sharpe Ratio 

In [None]:
#Installing FinRL
%%capture
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

In [None]:
#Installing Optuna
%%capture
!pip3 install optuna

In [None]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
import optuna
%matplotlib inline
from finrl.apps import config
from optuna.integration import PyTorchLightningPruningCallback
from finrl.neo_finrl.preprocessor.yahoodownloader import YahooDownloader
from finrl.neo_finrl.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.neo_finrl.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.neo_finrl.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy
from finrl.drl_agents.stablebaselines3.models import DRLAgent
from finrl.drl_agents.rllib.models import DRLAgent as DRLAgent_rllib
from finrl.neo_finrl.data_processor import DataProcessor
import joblib
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
import ray
from pprint import pprint

import sys
sys.path.append("../FinRL-Library")

import itertools


Module "zipline.assets" not found; multipliers will not be applied to position notionals.



In [None]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

## COLLECTING DATA AND PREPROCESSING

In [None]:
#Custom ticker list dataframe download
ticker_list = ['TSLA']
df = YahooDownloader(start_date = '2014-01-01',
                     end_date = '2021-10-01',
                     ticker_list = ticker_list).fetch_data()

[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (1951, 8)


In [None]:
#You can add technical indicators and turbulence factor to dataframe
#Just set the use_technical_indicator=True, use_vix=True and use_turbulence=True
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

Successfully added technical indicators
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (1950, 8)
Successfully added vix
Successfully added turbulence index


In [None]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)
processed_full.sort_values(['date','tic'],ignore_index=True).head(5)

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,rsi_30,cci_30,dx_30,vix,turbulence
0,2014-01-02,TSLA,29.959999,30.496,29.309999,30.02,30942000.0,3.0,0.0,0.0,66.666667,100.0,14.23,0.0
1,2014-01-03,TSLA,30.0,30.438,29.719999,29.912001,23475000.0,4.0,-0.002423,0.0,66.666667,100.0,13.76,0.0
2,2014-01-06,TSLA,30.0,30.08,29.048,29.4,26805500.0,0.0,-0.018965,0.0,-100.0,100.0,13.55,0.0
3,2014-01-07,TSLA,29.524,30.08,29.049999,29.872,25170500.0,1.0,-0.008982,44.200799,-39.954985,100.0,12.92,0.0
4,2014-01-08,TSLA,29.77,30.74,29.752001,30.256001,30816000.0,2.0,0.011912,59.330039,106.65291,5.761888,12.87,0.0


In [None]:
train = data_split(processed_full, '2014-01-01','2020-07-01')
trade = data_split(processed_full, '2020-05-01','2021-10-01')
print(len(train))
print(len(trade))

1635
357


In [None]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 1, State Space: 7


In [None]:
#Defining the environment kwargs

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}
#Instantiate the training gym compatible environment
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [None]:
#Instantiate the training environment
# Also instantiate our training gent
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
agent = DRLAgent(env = env_train)

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [None]:
#Instantiate the trading environment
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = None, **env_kwargs)

## TUNING HYPERPARAMETERS USING OPTUNA
1. Go to this [link](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py), you will find all possible hyperparamters to tune for all the models.
2. For your model, grab those hyperparamters which you want to optimize and then return a dictionary of hyperparamters.
3. There is a feature in Optuna called as hyperparamters importance, you can point out those hyperparamters which are important for tuning.
4. By default Optuna use [TPESampler](https://www.youtube.com/watch?v=tdwgR1AqQ8Y) for sampling hyperparamters from the search space. 

In [None]:
def sample_ddpg_params(trial:optuna.Trial):
  # Size of the replay buffer
  buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)])
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
  batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
  
  return {"buffer_size": buffer_size,
          "learning_rate":learning_rate,
          "batch_size":batch_size}

In [None]:
#Calculate the Sharpe ratio
#This is our objective for tuning
def calculate_sharpe(df):
  df['daily_return'] = df['account_value'].pct_change(1)
  if df['daily_return'].std() !=0:
    sharpe = (252**0.5)*df['daily_return'].mean()/ \
          df['daily_return'].std()
    return sharpe
  else:
    return 0

## CALLBACKS
1. The callback will terminate if the improvement margin is below certain point
2. It will terminate after certain number of trial_number are reached, not before that
3. It will hold its patience to reach the threshold

In [None]:
class LoggingCallback:
    def __init__(self,threshold,trial_number,patience):
      '''
      threshold:int tolerance for increase in sharpe ratio
      trial_number: int Prune after minimum number of trials
      patience: int patience for the threshold
      '''
      self.threshold = threshold
      self.trial_number  = trial_number
      self.patience = patience
      self.cb_list = [] #Trials list for which threshold is reached
    def __call__(self,study:optuna.study, frozen_trial:optuna.Trial):
      #Setting the best value in the current trial
      study.set_user_attr("previous_best_value", study.best_value)
      
      #Checking if the minimum number of trials have pass
      if frozen_trial.number >self.trial_number:
          previous_best_value = study.user_attrs.get("previous_best_value",None)
          #Checking if the previous and current objective values have the same sign
          if previous_best_value * study.best_value >=0:
              #Checking for the threshold condition
              if abs(previous_best_value-study.best_value) < self.threshold: 
                  self.cb_list.append(frozen_trial.number)
                  #If threshold is achieved for the patience amount of time
                  if len(self.cb_list)>self.patience:
                      print('The study stops now...')
                      print('With number',frozen_trial.number ,'and value ',frozen_trial.value)
                      print('The previous and current best values are {} and {} respectively'
                              .format(previous_best_value, study.best_value))
                      study.stop()

In [None]:
from IPython.display import clear_output
import sys         
def objective(trial:optuna.Trial):
  #Trial will suggest a set of hyperparamters from the specified range
  hyperparameters = sample_ddpg_params(trial)
  model_ddpg = agent.get_model("ddpg",model_kwargs = hyperparameters )
  #You can increase it for better comparison
  trained_ddpg = agent.train_model(model=model_ddpg,
                                  tb_log_name="ddpg" ,
                             total_timesteps=10000)
  clear_output(wait=True)
  #For the given hyperparamters, determine the account value in the trading period
  df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym)
  #Calculate sharpe from the account value
  sharpe = calculate_sharpe(df_account_value)

  return sharpe

#Create a study object and specify the direction as 'maximize'
#As you want to maximize sharpe
#Pruner stops not promising iterations
#Use a pruner, else you will get error related to divergence of model
#You can also use Multivariate samplere
#sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(study_name="ddpg_study",direction='maximize',
                            sampler = sampler, pruner=optuna.pruners.HyperbandPruner())

logging_callback = LoggingCallback(threshold=1e-5,patience=20,trial_number=5)
#You can increase the n_trials for a better search space scanning
study.optimize(objective, n_trials=100,catch=(ValueError,),callbacks=[logging_callback])

[32m[I 2021-10-02 06:23:51,956][0m Trial 26 finished with value: 0.0 and parameters: {'buffer_size': 100000, 'learning_rate': 0.06918729068589365, 'batch_size': 32}. Best is trial 3 with value: 1.9001030566141062.[0m


hit end!
The study stops now...
With number 26 and value  0.0
--- 1.9001030566141062 1.9001030566141062


In [None]:
study.trials

[FrozenTrial(number=0, values=[0.0], datetime_start=datetime.datetime(2021, 10, 2, 5, 17, 2, 606341), datetime_complete=datetime.datetime(2021, 10, 2, 5, 19, 32, 966401), params={'buffer_size': 100000, 'learning_rate': 0.009846738873614562, 'batch_size': 256}, distributions={'buffer_size': CategoricalDistribution(choices=(10000, 100000, 1000000)), 'learning_rate': LogUniformDistribution(high=1.0, low=1e-05), 'batch_size': CategoricalDistribution(choices=(32, 64, 128, 256, 512))}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=0, state=TrialState.COMPLETE, value=None),
 FrozenTrial(number=1, values=[0.0], datetime_start=datetime.datetime(2021, 10, 2, 5, 19, 32, 968633), datetime_complete=datetime.datetime(2021, 10, 2, 5, 22, 5, 653662), params={'buffer_size': 1000000, 'learning_rate': 0.1452824663751603, 'batch_size': 512}, distributions={'buffer_size': CategoricalDistribution(choices=(10000, 100000, 1000000)), 'learning_rate': LogUniformDistribution(high=1.0, low=1e-0

In [None]:
#It returns all the trials
# The key values mean sharpe ratio
# So for number 9 we have highest sharpe ratio
# study = joblib.load("/content/final_ddpg_study.pkl")
# study.trials

In [None]:
joblib.dump(study, "final_ddpg_study__.pkl")

['final_ddpg_study__.pkl']

In [None]:
#Get the best hyperparamters
print('Hyperparameters after tuning',study.best_params)
print('Hyperparameters before tuning',config.DDPG_PARAMS)

Hyperparameters after tuning {'buffer_size': 100000, 'learning_rate': 0.010907475835157693, 'batch_size': 256}
Hyperparameters before tuning {'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}


In [None]:
study.best_trial

FrozenTrial(number=3, values=[1.9001030566141062], datetime_start=datetime.datetime(2021, 10, 2, 5, 24, 35, 464612), datetime_complete=datetime.datetime(2021, 10, 2, 5, 27, 5, 717048), params={'buffer_size': 100000, 'learning_rate': 0.010907475835157693, 'batch_size': 256}, distributions={'buffer_size': CategoricalDistribution(choices=(10000, 100000, 1000000)), 'learning_rate': LogUniformDistribution(high=1.0, low=1e-05), 'batch_size': CategoricalDistribution(choices=(32, 64, 128, 256, 512))}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=3, state=TrialState.COMPLETE, value=None)

In [None]:
#Train your tuned model using the best hyperparamters for 50000 steps
tuned_model_ddpg_ = agent.get_model("ddpg",model_kwargs = study.best_params )
trained_ddpg = agent.train_model(model=tuned_model_ddpg_, 
                             tb_log_name='ddpg',
                             total_timesteps=30000)

{'buffer_size': 100000, 'learning_rate': 0.010907475835157693, 'batch_size': 256}
Using cuda device
Logging to tensorboard_log/ddpg/ddpg_33
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 98       |
|    time_elapsed    | 66       |
|    total timesteps | 6540     |
| train/             |          |
|    actor_loss      | 0.335    |
|    critic_loss     | 33.5     |
|    learning_rate   | 0.0109   |
|    n_updates       | 4905     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 85       |
|    time_elapsed    | 152      |
|    total timesteps | 13080    |
| train/             |          |
|    actor_loss      | -0.00693 |
|    critic_loss     | 34.9     |
|    learning_rate   | 0.0109   |
|    n_updates       | 11445    |
---------------------------------
day: 1634, episode: 300
begin_total_asset: 1

In [None]:
#Trading period account value with tuned model
df_account_value_tuned, df_actions_tuned = DRLAgent.DRL_prediction(
    model=tuned_model_ddpg_, 
    environment = e_trade_gym)

hit end!


In [None]:
#Backtesting with our pruned model
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all_tuned = backtest_stats(account_value=df_account_value_tuned)
perf_stats_all_tuned = pd.DataFrame(perf_stats_all_tuned)
perf_stats_all_tuned.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_tuned_"+now+'.csv')

Annual return          1.661870
Cumulative returns     3.002653
Annual volatility      0.617119
Sharpe ratio           1.900103
Calmar ratio           4.584901
Stability              0.709180
Max drawdown          -0.362466
Omega ratio            1.414683
Sortino ratio          3.080305
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.294293
Daily value at risk   -0.073097
dtype: float64


In [None]:
#Now train with not tuned hyperaparameters
#Default config.ddpg_PARAMS
non_tuned_model_ddpg_ = agent.get_model("ddpg",model_kwargs = config.DDPG_PARAMS )
trained_ddpg = agent.train_model(model=non_tuned_model_ddpg_, 
                             tb_log_name='ddpg',
                             total_timesteps=30000)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cuda device
Logging to tensorboard_log/ddpg/ddpg_34
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 96       |
|    time_elapsed    | 67       |
|    total timesteps | 6540     |
| train/             |          |
|    actor_loss      | 4.03e+03 |
|    critic_loss     | 7.65e+05 |
|    learning_rate   | 0.001    |
|    n_updates       | 4905     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 84       |
|    time_elapsed    | 154      |
|    total timesteps | 13080    |
| train/             |          |
|    actor_loss      | 2.98e+03 |
|    critic_loss     | 3.27e+04 |
|    learning_rate   | 0.001    |
|    n_updates       | 11445    |
---------------------------------
day: 1634, episode: 320
begin_total_asset: 1000000.00
end_to

In [None]:
df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=non_tuned_model_ddpg_, 
    environment = e_trade_gym)

hit end!


In [None]:
#Backtesting for not tuned hyperparamters
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

Annual return          0.0
Cumulative returns     0.0
Annual volatility      0.0
Sharpe ratio           NaN
Calmar ratio           NaN
Stability              0.0
Max drawdown           0.0
Omega ratio            NaN
Sortino ratio          NaN
Skew                   NaN
Kurtosis               NaN
Tail ratio             NaN
Daily value at risk    0.0
dtype: float64



invalid value encountered in true_divide


invalid value encountered in true_divide


invalid value encountered in double_scalars



In [None]:
#You can see with trial, our sharpe ratio is increasing
#Certainly you can afford more number of trials for further optimization
from optuna.visualization import plot_optimization_history
plot_optimization_history(study)

In [None]:
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
#Hyperparamters importance
#Ent_coef is the most important
plot_param_importances(study)

## FURTHER WORKS

1.   You can tune more critical hyperparameters
2.   Multi-objective hyperparameter optimization using Optuna. Here we can maximize Sharpe and simultaneously minimize Volatility in our account value to tune our hyperparameters

