<a href="https://colab.research.google.com/github/bateikoEd/Actor-Critic-and-Deep-Deterministic-Policy-Gradient-in-the-2022/blob/dev/run_experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries


In [None]:
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime

%matplotlib inline
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl.meta.data_processor import DataProcessor

from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from pprint import pprint

import sys
sys.path.append("../FinRL")

import itertools


from finrl import config
from finrl import config_tickers
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)

import json
from google.colab import drive


from collections import defaultdict

  'Module "zipline.assets" not found; multipliers will not be applied'


Mounted at /content/drive


# Import data

In [4]:
# path for reading preprocesed data and saving reusults
## if you use colab
# drive.mount('/content/drive')
# path_saving_models = '/content/drive/MyDrive/test/' 
# path_data_dicrectory = '/content/drive/MyDrive/test/'

# if you use local machine
path_saving_models = 'saving_results/'
path_data_dicrectory = 'preprocessed_data/'

SUFIX_AGENT = '_AGNT'
SUFIX_ENV = '_ENV'

processed_full = pd.read_csv(path_data_dicrectory + 'qqq.csv').iloc[:, 1:]
BASELINE_TICKET = "^NDX"

TRAIN_START_DATE = '2022-01-01'
TRAIN_END_DATE = '2022-06-01'
TRADE_START_DATE = '2022-06-01'
TRADE_END_DATE = '2022-09-28'

train = data_split(processed_full, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))

print(INDICATORS)

stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


8240
6480
['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma']
Stock Dimension: 80, State Space: 801


# Set envirinment

In [5]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()


dict_params_env = dict()

for param, values in env_kwargs.items():
  if param in ['num_stock_shares', 'buy_cost_pct', 'sell_cost_pct']:
    dict_params_env[param] = env_kwargs[param][0]

  else:
    dict_params_env[param] = env_kwargs[param]

# Set agent parameters


In [23]:
AGENT_NAME = 'sac' # sac, ppo, ddpg, td3, a2c

total_timesteps = 100000

PARAMS = {}
if AGENT_NAME == 'sac':

  PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
  }
elif AGENT_NAME == 'ppo':

  PARAMS = {
      "n_steps": 2048,
      "ent_coef": 0.01,
      "learning_rate": 0.00025,
      "batch_size": 128,
  }
elif AGENT_NAME == 'td3':  
  PARAMS = {"batch_size": 100, 
                "buffer_size": 1000000, 
                "learning_rate": 0.001}
else:
  PARAMS = dict()

list_params = ['batch_size', 'buffer_size', 
               'learning_rate', 'learning_starts',
               'ent_coef', 'n_steps']

dict_paramas_agent_main = defaultdict(float)

for param in list_params:
  dict_paramas_agent_main[param] = PARAMS.get(param, -1)

dict_paramas_agent_main['name'] = AGENT_NAME
dict_paramas_agent_main['timestemps'] = total_timesteps

dict_config = dict()

# add params for agent
for param, value in dict_paramas_agent_main.items():
  dict_config[param + SUFIX_AGENT] = value

# add params for env
for param, value in dict_params_env.items():
  dict_config[param + SUFIX_ENV] = value


# initialize agent
agent = DRLAgent(env = env_train)

if len(PARAMS) > 0:
  model = agent.get_model(AGENT_NAME, model_kwargs = PARAMS)
else:
  model = agent.get_model(AGENT_NAME)

dict_config['TRAIN_START_DATE'] = TRAIN_START_DATE
dict_config['TRAIN_END_DATE'] = TRAIN_END_DATE
dict_config['TRADE_START_DATE'] = TRADE_START_DATE
dict_config['TRADE_END_DATE'] = TRADE_END_DATE

print(f"config:\t{dict_config}")
print(f"dict_params_env:\t{dict_params_env}")


{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
config:	{'batch_size_AGNT': 128, 'buffer_size_AGNT': 100000, 'learning_rate_AGNT': 0.0001, 'learning_starts_AGNT': 100, 'ent_coef_AGNT': 'auto_0.1', 'n_steps_AGNT': -1, 'name_AGNT': 'sac', 'timestemps_AGNT': 10, 'hmax_ENV': 100, 'initial_amount_ENV': 100000, 'num_stock_shares_ENV': 0, 'buy_cost_pct_ENV': 0.001, 'sell_cost_pct_ENV': 0.001, 'state_space_ENV': 801, 'stock_dim_ENV': 80, 'tech_indicator_list_ENV': ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma'], 'action_space_ENV': 80, 'reward_scaling_ENV': 0.0001, 'TRAIN_START_DATE': '2022-01-01', 'TRAIN_END_DATE': '2022-06-01', 'TRADE_START_DATE': '2022-06-01', 'TRADE_END_DATE': '2022-09-28'}
dict_params_env:	{'hmax': 100, 'initial_amount': 100000, 'num_stock_shares': 0, 'buy_cost_pct': 0.001, 'sell_cost_pct': 0.001, 'state_space': 801, 'stock_dim': 80, 'tech_indic

# Run experiment

In [None]:
NUMBER = '_' + str(5)


RUN_NAME = AGENT_NAME + BASELINE_TICKET + NUMBER



# TRAINING

print("Training ...")
trained_model = agent.train_model(model=model, 
                            tb_log_name=AGENT_NAME,
                            total_timesteps=total_timesteps) 

# ------------------------- traiding

# Set turbulence threshold
print("Processing ...")
data_risk_indicator = processed_full[(processed_full.date<TRAIN_END_DATE) & (processed_full.date>=TRAIN_START_DATE)]
insample_risk_indicator = data_risk_indicator.drop_duplicates(subset=['date'])

# back testing
turbulence_threshold = insample_risk_indicator.vix.quantile(0.996)
risk_indicator_col = 'vix'

# create traiding env
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = turbulence_threshold, risk_indicator_col=risk_indicator_col, **env_kwargs)

# --------- define model
print("Predictions ...")
df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_model, environment = e_trade_gym)

# - savign for creating images 
df_account_value.to_csv( path_saving_models + 'df_accnt_val' + RUN_NAME \
                        + BASELINE_TICKET + '.csv', \
                        index=False)

# - savign for creating images 
df_actions.to_csv( path_saving_models + 'df_actions' + RUN_NAME \
                        + BASELINE_TICKET + '.csv', \
                        index=False)
# ------------------------

# calculate backtest 
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

# ---- create dict for backtesting
dict_trading_backtest = dict()

for i, metric_value in enumerate(perf_stats_all.values):
  metric_name = perf_stats_all.index[i]
  metric_name = '_'.join(metric_name.lower().split())

  dict_trading_backtest[metric_name] = metric_value[0]

# -------------------- 

# calculate baseline stats
print("==============Get Baseline Stats===========")
baseline_df = get_baseline( \
        ticker=BASELINE_TICKET, \
        start = df_account_value.loc[0,'date'],\
        end = df_account_value.loc[len(df_account_value)-1,'date'])

stats = backtest_stats(baseline_df, value_col_name = 'close')

# ------ create dict for baseline
dict_trading_baseline = dict()
dict_trading_baseline['ticker'] = BASELINE_TICKET

for i, metric_value in enumerate(stats.values):
  metric_name = stats.index[i]
  metric_name = '_'.join(metric_name.lower().split())

  dict_trading_baseline[metric_name] = metric_value

# -----------------------

dict_trading_baseline = dict((metric + '_bs', value) for metric, value in dict_trading_baseline.items())
dict_trading_backtest = dict((metric + '_bc', value) for metric, value in dict_trading_backtest.items())


# print plots

print("==============Compare to Ticker===========")
%matplotlib inline
backtest_plot(df_account_value, \
            baseline_ticker = BASELINE_TICKET, \
            baseline_start = df_account_value.loc[0,'date'],\
            baseline_end = df_account_value.loc[len(df_account_value)-1,'date'])