## Set Lab Black

In [1]:
%load_ext lab_black

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime

%matplotlib inline
from finrl.config_tickers import DOW_30_TICKER
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent, DRLEnsembleAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

from pprint import pprint
import sys

sys.path.append("../FinRL-Library")
import itertools

## Create Folders

In [4]:
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)

check_and_make_directories(
    [DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR]
)

## Download Data

In [5]:
print(DOW_30_TICKER)

['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW']


In [6]:
TRAIN_START_DATE = "2009-04-01"
TRAIN_END_DATE = "2021-01-01"
TEST_START_DATE = "2021-01-01"
TEST_END_DATE = "2022-06-01"

df = YahooDownloader(
    start_date=TRAIN_START_DATE, end_date=TEST_END_DATE, ticker_list=DOW_30_TICKER
).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

## Preprocessing Data

In [7]:
fs = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list=INDICATORS,
    use_turbulence=True,
    user_defined_feature=False,
)

In [8]:
processed = fs.preprocess_data(df)
processed = processed.copy()
processed = processed.fillna(0)
processed = processed.replace(np.inf, 0)

Successfully added technical indicators
Successfully added turbulence index


In [9]:
processed.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,turbulence
0,2009-04-01,3.7175,3.892857,3.710357,3.308903,589372000.0,AAPL,2,0.0,3.54317,3.197019,100.0,66.666667,100.0,3.308903,3.308903,0.0
1,2009-04-01,48.779999,48.93,47.099998,36.22839,10850100.0,AMGN,2,0.0,3.54317,3.197019,100.0,66.666667,100.0,36.22839,36.22839,0.0
2,2009-04-01,13.34,14.64,13.08,11.772775,27701800.0,AXP,2,0.0,3.54317,3.197019,100.0,66.666667,100.0,11.772775,11.772775,0.0
3,2009-04-01,34.52,35.599998,34.209999,26.850748,9288800.0,BA,2,0.0,3.54317,3.197019,100.0,66.666667,100.0,26.850748,26.850748,0.0
4,2009-04-01,27.5,29.52,27.440001,19.820396,15308300.0,CAT,2,0.0,3.54317,3.197019,100.0,66.666667,100.0,19.820396,19.820396,0.0


## Design Enviroment

The action space describes the allowed actions that the agent interacts with the environment. Normally, action a includes three actions: {-1, 0, 1}, where -1, 0, 1 represent selling, holding, and buying one share. Also, an action can be carried upon multiple shares. We use an action space {-k,…,-1, 0, 1, …, k}, where k denotes the number of shares to buy and -k denotes the number of shares to sell. For example, "Buy 10 shares of AAPL" or "Sell 10 shares of AAPL" are 10 or -10, respectively. The continuous action space needs to be normalized to [-1, 1], since the policy is defined on a Gaussian distribution, which needs to be normalized and symmetric.

In [10]:
stock_dimension = len(processed.tic.unique())
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [11]:
env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
    "print_verbosity": 5,
}

## Implement DRL Algorithms

In [12]:
rebalance_window = 63  # rebalance_window is the number of days to retrain the model
validation_window = 63  # validation_window is the number of days to do validation and trading (e.g. if validation_window=63, then both validation and trading period will be 63 days)

ensemble_agent = DRLEnsembleAgent(
    df=processed,
    train_period=(TRAIN_START_DATE, TRAIN_END_DATE),
    val_test_period=(TEST_START_DATE, TEST_END_DATE),
    rebalance_window=rebalance_window,
    validation_window=validation_window,
    **env_kwargs
)

In [13]:
A2C_model_kwargs = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0007}

PPO_model_kwargs = {
    "ent_coef": 0.01,
    "n_steps": 2048,
    "learning_rate": 0.00025,
    "batch_size": 128,
}

DDPG_model_kwargs = {
    # "action_noise":"ornstein_uhlenbeck",
    "buffer_size": 10_000,
    "learning_rate": 0.0005,
    "batch_size": 64,
}

timesteps_dict = {"a2c": 10_000, "ppo": 10_000, "ddpg": 10_000}

In [None]:
df_summary = ensemble_agent.run_ensemble_strategy(A2C_model_kwargs,
                                                 PPO_model_kwargs,
                                                 DDPG_model_kwargs,
                                                 timesteps_dict)

turbulence_threshold:  203.40334064436135
{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007}
Using cuda device
Logging to tensorboard_log/a2c/a2c_126_2
--------------------------------------
| time/                 |            |
|    fps                | 48         |
|    iterations         | 100        |
|    time_elapsed       | 10         |
|    total_timesteps    | 500        |
| train/                |            |
|    entropy_loss       | -41.2      |
|    explained_variance | 0.159      |
|    learning_rate      | 0.0007     |
|    n_updates          | 99         |
|    policy_loss        | -30.9      |
|    reward             | -0.6531783 |
|    std                | 1          |
|    value_loss         | 4.68       |
--------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 54        |
|    iterations         | 200       |
|    time_elapsed       | 18        |
|    total_timesteps    | 

In [16]:
df_summary

Unnamed: 0,Iter,Val Start,Val End,Model Used,A2C Sharpe,PPO Sharpe,DDPG Sharpe
0,126,2021-01-04,2021-04-06,PPO,0.111138,0.38092,0.335368
1,189,2021-04-06,2021-07-06,DDPG,0.098565,0.147118,0.234092
2,252,2021-07-06,2021-10-04,DDPG,-0.100266,-0.136928,0.03668
3,315,2021-10-04,2022-01-03,DDPG,0.182045,0.097331,0.182273


## Backtest Out Strategy

In [17]:
unique_trade_date = processed[
    (processed.date > TEST_START_DATE) & (processed.date <= TEST_END_DATE)
].date.unique()

In [19]:
df_trade_date = pd.DataFrame({"datadate": unique_trade_date})

df_account_value = pd.DataFrame()
for i in range(
    rebalance_window + validation_window, len(unique_trade_date) + 1, rebalance_window
):
    temp = pd.read_csv("results/account_value_trade_{}_{}.csv".format("ensemble", i))
    df_account_value = df_account_value.append(temp, ignore_index=True)
sharpe = (
    (252**0.5)
    * df_account_value.account_value.pct_change(1).mean()
    / df_account_value.account_value.pct_change(1).std()
)
print("Sharpe Ratio: ", sharpe)
df_account_value = df_account_value.join(
    df_trade_date[validation_window:].reset_index(drop=True)
)

Sharpe Ratio:  -0.02782960361980809


In [20]:
df_account_value.head()

Unnamed: 0,account_value,date,daily_return,datadate
0,1000000.0,2021-04-06,,2021-04-06
1,1000181.0,2021-04-07,0.000181,2021-04-07
2,999842.7,2021-04-08,-0.000338,2021-04-08
3,1001747.0,2021-04-09,0.001905,2021-04-09
4,1000244.0,2021-04-12,-0.001501,2021-04-12


In [26]:
df_account_value.set_index("datadate").account_value.plot()

<AxesSubplot:xlabel='datadate'>

In [27]:
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return         -0.014237
Cumulative returns    -0.014237
Annual volatility      0.144326
Sharpe ratio          -0.027830
Calmar ratio          -0.106197
Stability              0.163804
Max drawdown          -0.134058
Omega ratio            0.995412
Sortino ratio         -0.039131
Skew                        NaN
Kurtosis                    NaN
Tail ratio             0.977878
Daily value at risk   -0.018199
dtype: float64
