# Introduction
1. In this tutorial, we will be tuning hyperparameters for Stable baselines3 models using Optuna.
2. The default model hyperparamters may not be adequate for your custom portfolio or custom state-space. Reinforcement learning algorithms are sensitive to hyperparamters, hence tuning is an important step.
3. Hyperparamters are tuned based on an objective, which needs to be maximized or minimized. Here we tuned our hyperparamters to maximize the Sharpe Ratio

In [None]:
#Installing Optuna
# %%capture
# !pip3 install optuna
# !pip install optuna-integration

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime
import optuna

import sys
sys.path.append("/home/ubuntu/algorithmic_trading/CNN-DRL")
sys.path.append("/home/ubuntu/algorithmic_trading/CNN-DRL/FinRL-Meta")

from meta import config, config_tickers
from optuna.integration import PyTorchLightningPruningCallback
from meta.data_processor import DataProcessor
from stable_baselines3.common.vec_env import DummyVecEnv
from agents2.agent import DRLAgent
from envs.StockTradingEnvCNN import StockTradingEnvCNN
from envs.StockTradingEnvMLP import StockTradingEnvMLP
from policies.CnnPolicy import CustomCNN
from plot2 import get_baseline
import joblib



## Hyperparameters

In [2]:
TRAIN_START_DATE = config.TRAIN_START_DATE
TRAIN_END_DATE = config.TRAIN_END_DATE
TEST_START_DATE = config.TEST_START_DATE
TEST_END_DATE = config.TEST_END_DATE
DOW_30_TICKER = config_tickers.DOW_30_TICKER
INDICATORS = config.INDICATORS

In [3]:
policy_kwargs = dict(
    features_extractor_class=CustomCNN,
    features_extractor_kwargs=dict(features_dim=30),
)

## Collecting data and preprocessing

In [4]:
def instantiate_env(data_source, start_date, end_date, time_interval,
                    ticker_list, technical_indicator_list, env,
                    if_vix=True, cache=False, select_stockstats_talib=0,
                    hmax=100, initial_amount=1000000, reward_scaling=1e-4,
                    transaction_cost_pct=0.001, if_train=True, **kwargs):

    # fetch data
    import warnings
    warnings.filterwarnings("ignore")

    DP = DataProcessor(
        data_source=data_source,
        start_date=start_date,
        end_date=end_date,
        time_interval=time_interval
    )
    price_array, tech_array, turbulence_array = DP.run(
        ticker_list=ticker_list,
        technical_indicator_list=technical_indicator_list,
        if_vix=if_vix,
        cache=cache,
        select_stockstats_talib=select_stockstats_talib
    )

    df = DP.dataframe
    df = df.sort_values(['time', 'tic'], ignore_index=True)

    stock_dimension = len(df.tic.unique())
    state_space = 1 + 2*stock_dimension + len(technical_indicator_list)*stock_dimension

    buy_cost_list = sell_cost_list = [transaction_cost_pct] * stock_dimension
    num_stock_shares = [0] * stock_dimension

    env_kwargs = {
        "hmax": hmax,
        "initial_amount": initial_amount,
        "num_stock_shares": num_stock_shares,
        "buy_cost_pct": buy_cost_list,
        "sell_cost_pct": sell_cost_list,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": technical_indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": reward_scaling
    }

    if if_train:
        e_train_gym = env(df=df, **env_kwargs)
        agent = DRLAgent(env=e_train_gym)
        return e_train_gym, agent
    else:
        e_trade_gym = env(df=df, turbulence_threshold=None, **env_kwargs)
        return e_trade_gym, None

In [5]:
e_train_gym, agent = instantiate_env(start_date=TRAIN_START_DATE,
                                    end_date=TRAIN_END_DATE,
                                    ticker_list=DOW_30_TICKER,
                                    data_source='yahoofinance',
                                    time_interval='1D',
                                    technical_indicator_list=INDICATORS,
                                    env=StockTradingEnvCNN)

e_trade_gym, _ = instantiate_env(start_date=TEST_START_DATE,
                                end_date=TEST_END_DATE,
                                ticker_list=DOW_30_TICKER,
                                data_source='yahoofinance',
                                time_interval='1D',
                                technical_indicator_list=INDICATORS,
                                env=StockTradingEnvCNN)

yahoofinance successfully connected


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

            date       open       high        low      close  adjusted_close  \
0     2014-01-02  90.900002  91.080002  89.379997  89.449997       76.835388   
1     2014-01-03  89.150002  90.080002  88.629997  89.739998       77.084503   
2     2014-01-06  89.699997  90.250000  89.379997  89.699997       77.050117   
3     2014-01-07  90.169998  90.699997  89.010002  89.360001       76.758095   
4     2014-01-08  88.660004  89.639999  88.660004  89.410004       76.999229   
...          ...        ...        ...        ...        ...             ...   
48364 2020-07-24  42.820000  43.480000  42.299999  42.610001       35.357632   
48365 2020-07-27  42.810001  44.480000  42.430000  44.000000       36.511047   
48366 2020-07-28  43.740002  44.020000  42.590000  42.619999       35.365929   
48367 2020-07-29  42.799999  43.636002  42.740002  43.520000       36.112736   
48368 2020-07-30  42.660000  42.830002  41.340000  41.730000       34.627403   

        volume  tic  day  
0      51120

[*********************100%%**********************]  1 of 1 completed


           date       open       high        low      close  adjusted_close  \
0    2014-01-02  14.320000  14.590000  14.000000  14.230000       14.230000   
1    2014-01-03  14.060000  14.220000  13.570000  13.760000       13.760000   
2    2014-01-06  13.410000  14.000000  13.220000  13.550000       13.550000   
3    2014-01-07  12.380000  13.280000  12.160000  12.920000       12.920000   
4    2014-01-08  13.040000  13.240000  12.860000  12.870000       12.870000   
...         ...        ...        ...        ...        ...             ...   
1651 2020-07-24  27.959999  28.580000  25.530001  25.840000       25.840000   
1652 2020-07-27  26.600000  26.940001  24.549999  24.740000       24.740000   
1653 2020-07-28  24.860001  25.850000  24.049999  25.440001       25.440001   
1654 2020-07-29  25.160000  25.420000  23.730000  24.100000       24.100000   
1655 2020-07-30  25.040001  28.290001  24.639999  24.760000       24.760000   

      volume   tic  day  
0          0  ^VIX    3  

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

           date       open       high        low      close  adjusted_close  \
0    2020-08-03  93.309998  94.400002  92.440002  93.540001       89.009560   
1    2020-08-04  93.489998  93.800003  92.489998  93.190002       88.676506   
2    2020-08-05  93.820000  95.580002  93.809998  95.389999       90.769951   
3    2020-08-06  95.180000  96.269997  94.480003  95.919998       91.274269   
4    2020-08-07  95.290001  99.300003  94.949997  99.160004       94.357353   
...         ...        ...        ...        ...        ...             ...   
8815 2021-09-24  57.200001  57.340000  56.740002  56.810001       50.040340   
8816 2021-09-27  56.880001  60.380001  56.880001  59.689999       52.577145   
8817 2021-09-28  60.150002  60.645000  59.139999  59.459999       52.374550   
8818 2021-09-29  59.459999  59.599998  58.770000  58.820000       51.810814   
8819 2021-09-30  58.919998  59.169998  57.549999  57.560001       50.700966   

        volume  tic  day  
0      2869000  AXP    0

[*********************100%%**********************]  1 of 1 completed

Succesfully add technical indicators
          date       open       high        low      close  adjusted_close  \
0   2020-08-03  25.750000  26.010000  22.170000  24.280001       24.280001   
1   2020-08-04  24.010000  24.760000  22.920000  23.760000       23.760000   
2   2020-08-05  23.440001  23.610001  22.860001  22.990000       22.990000   
3   2020-08-06  23.030001  24.110001  20.969999  22.650000       22.650000   
4   2020-08-07  23.450001  24.020000  22.020000  22.209999       22.209999   
..         ...        ...        ...        ...        ...             ...   
289 2021-09-24  19.330000  20.410000  17.629999  17.750000       17.750000   
290 2021-09-27  17.780001  19.320000  17.740000  18.760000       18.760000   
291 2021-09-28  19.740000  24.820000  19.709999  23.250000       23.250000   
292 2021-09-29  22.070000  23.790001  21.450001  22.559999       22.559999   
293 2021-09-30  21.480000  24.709999  20.600000  23.139999       23.139999   

     volume   tic  day  
0




Data clean for ^VIX is finished.
Data clean all finished!
Successfully transformed into array


## Tuning hyperparameters using Optuna
1. Go to this [link](https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/utils/hyperparams_opt.py), you will find all possible hyperparamters to tune for all the models.
2. For your model, grab those hyperparamters which you want to optimize and then return a dictionary of hyperparamters.
3. There is a feature in Optuna called as hyperparamters importance, you can point out those hyperparamters which are important for tuning.
4. By default Optuna use [TPESampler](https://www.youtube.com/watch?v=tdwgR1AqQ8Y) for sampling hyperparamters from the search space.

In [6]:
def sample_ddpg_params(trial:optuna.Trial):
    # Size of the replay buffer
    buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(3e4), int(5e4)])
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
    batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
    
    return {"buffer_size": buffer_size,
          "learning_rate":learning_rate,
          "batch_size":batch_size}

In [7]:
#Calculate the Sharpe ratio
#This is our objective for tuning
def calculate_sharpe(df):
    df['daily_return'] = df['account_value'].pct_change(1)
    if df['daily_return'].std() !=0:
        sharpe = (252**0.5)*df['daily_return'].mean()/ \
              df['daily_return'].std()
        return sharpe
    else:
        return 0

## Callbacks
1. The callback will terminate if the improvement margin is below certain point
2. It will terminate after certain number of trial_number are reached, not before that
3. It will hold its patience to reach the threshold

In [8]:
class LoggingCallback:
    def __init__(self,threshold,trial_number,patience):
        '''
        threshold:int tolerance for increase in sharpe ratio
        trial_number: int Prune after minimum number of trials
        patience: int patience for the threshold
        '''
        self.threshold = threshold
        self.trial_number  = trial_number
        self.patience = patience
        self.cb_list = [] #Trials list for which threshold is reached
    def __call__(self,study:optuna.study, frozen_trial:optuna.Trial):
        #Setting the best value in the current trial
        study.set_user_attr("previous_best_value", study.best_value)
        
        #Checking if the minimum number of trials have pass
        if frozen_trial.number >self.trial_number:
            previous_best_value = study.user_attrs.get("previous_best_value",None)
            #Checking if the previous and current objective values have the same sign
            if previous_best_value * study.best_value >=0:
                #Checking for the threshold condition
                if abs(previous_best_value-study.best_value) < self.threshold:
                    self.cb_list.append(frozen_trial.number)
                    #If threshold is achieved for the patience amount of time
                    if len(self.cb_list)>self.patience:
                        print('The study stops now...')
                        print('With number',frozen_trial.number ,'and value ',frozen_trial.value)
                        print('The previous and current best values are {} and {} respectively'
                              .format(previous_best_value, study.best_value))
                        study.stop()

In [9]:
from IPython.display import clear_output
import sys
import os

os.makedirs("models",exist_ok=True)

def objective(trial:optuna.Trial, model_name, policy, sample_model_params, policy_kwargs, total_timesteps):
    #Trial will suggest a set of hyperparamters from the specified range
    hyperparameters = sample_model_params(trial)
    model = agent.get_model(model_name, policy=policy, model_kwargs = hyperparameters, policy_kwargs=policy_kwargs)
    #You can increase it for better comparison
    trained_model = agent.train_model(model=model,
                                  tb_log_name=model_name,
                                  total_timesteps=total_timesteps)
    trained_model.save(f'models/{model_name}_{trial.number}.pth')
    clear_output(wait=True)
    #For the given hyperparamters, determine the account value in the trading period
    df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_model,
    environment = e_trade_gym)
    #Calculate sharpe from the account value
    sharpe = calculate_sharpe(df_account_value)
    
    return sharpe

In [12]:
#Create a study object and specify the direction as 'maximize'
#As you want to maximize sharpe
#Pruner stops not promising iterations
#Use a pruner, else you will get error related to divergence of model
#You can also use Multivariate samplere
#sampler = optuna.samplers.TPESampler(multivarite=True,seed=42)
# Sampler using TPE (Tree-structured Parzen Estimator) algorithm
def run_paramtune(**kwargs):
    model_name = kwargs.get('model_name')
    policy = kwargs.get('policy', 'MlpPolicy')
    sample_model_params = kwargs.get('sample_model_params')
    policy_kwargs = kwargs.get('policy_kwargs', None)
    n_trials = kwargs.get('n_trials', 30)
    total_timesteps = kwargs.get('total_timesteps', 10000)
    
    sampler = optuna.samplers.TPESampler(seed=42)
    study = optuna.create_study(study_name=f"{model_name}_study",direction='maximize',
                                sampler = sampler, pruner=optuna.pruners.HyperbandPruner())
    
    logging_callback = LoggingCallback(threshold=1e-5,patience=30,trial_number=5)
    #You can increase the n_trials for a better search space scanning
    study.optimize(lambda trial: objective(trial, model_name, policy, sample_model_params, policy_kwargs, total_timesteps),
                   n_trials=n_trials, catch=(ValueError,),callbacks=[logging_callback])

In [14]:
run_paramtune(model_name="ddpg",
              policy="CnnPolicy",
              sample_model_params=sample_ddpg_params,
              policy_kwargs=policy_kwargs,
              n_trials=10,
              total_timesteps=20000)

[I 2024-04-08 10:36:39,709] A new study created in memory with name: ddpg_study


{'buffer_size': 30000, 'learning_rate': 0.0006251373574521745, 'batch_size': 64}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to tensorboard_log/ddpg/ddpg_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 256      |
|    ep_rew_mean     | 8.63     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 47       |
|    time_elapsed    | 21       |
|    total_timesteps | 1024     |
| train/             |          |
|    actor_loss      | -7.38    |
|    critic_loss     | 3.68     |
|    learning_rate   | 0.000625 |
|    n_updates       | 923      |
|    reward          | 0.0      |
---------------------------------
day: 345, episode: 30
begin_total_asset: 1000000.00
end_total_asset: 1176765.43
total_reward: 176765.43
total_cost: 132.41
total_trades: 3324
Sharpe: 0.655
---------------------------------
| rollout/           |          |
|    ep_len_mean 

[W 2024-04-08 10:41:35,671] Trial 0 failed with parameters: {'buffer_size': 30000, 'learning_rate': 0.0006251373574521745, 'batch_size': 64} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/py39/lib/python3.9/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_2574/2567411073.py", line 22, in <lambda>
    study.optimize(lambda trial: objective(trial, model_name, policy, sample_model_params, policy_kwargs, total_timesteps),
  File "/tmp/ipykernel_2574/3004778151.py", line 12, in objective
    trained_model = agent.train_model(model=model,
  File "/home/ubuntu/algorithmic_trading/CNN-DRL/agents2/agent.py", line 107, in train_model
    model = model.learn(
  File "/home/ubuntu/anaconda3/envs/py39/lib/python3.9/site-packages/stable_baselines3/ddpg/ddpg.py", line 123, in learn
    return super().learn(
  File "/home/ubuntu/anaconda3/envs/py39/lib/

KeyboardInterrupt: 

In [None]:
joblib.dump(study, "final_ddpg_study__.pkl")

In [None]:
#Get the best hyperparamters
print('Hyperparameters after tuning',study.best_params)
print('Hyperparameters before tuning',config.DDPG_PARAMS)

In [None]:
study.best_trial

In [None]:
from stable_baselines3 import DDPG
tuned_model_ddpg = DDPG.load('models/ddpg_{}.pth'.format(study.best_trial.number),env=e_train_gym)

In [None]:
#Trading period account value with tuned model
df_account_value_tuned, df_actions_tuned = DRLAgent.DRL_prediction(
    model=tuned_model_ddpg,
    environment = e_trade_gym)

In [None]:
#Backtesting with our pruned model
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all_tuned = backtest_stats(account_value=df_account_value_tuned)
perf_stats_all_tuned = pd.DataFrame(perf_stats_all_tuned)
perf_stats_all_tuned.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_tuned_"+now+'.csv')

In [None]:
#Now train with not tuned hyperaparameters
#Default config.ddpg_PARAMS
non_tuned_model_ddpg = agent.get_model("ddpg",model_kwargs = config.DDPG_PARAMS )
trained_ddpg = agent.train_model(model=non_tuned_model_ddpg,
                             tb_log_name='ddpg',
                             total_timesteps=50000)

In [None]:
df_account_value, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg,
    environment = e_trade_gym)

In [None]:
#Backtesting for not tuned hyperparamters
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

In [None]:
#You can see with trial, our sharpe ratio is increasing
#Certainly you can afford more number of trials for further optimization
from optuna.visualization import plot_optimization_history
plot_optimization_history(study)

In [None]:
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [None]:
#Hyperparamters importance
#Ent_coef is the most important
plot_param_importances(study)

## Further works

1.   You can tune more critical hyperparameters
2.   Multi-objective hyperparameter optimization using Optuna. Here we can maximize Sharpe and simultaneously minimize Volatility in our account value to tune our hyperparameters



In [None]:
plot_edf(study)