In [9]:
%load_ext autoreload
%matplotlib inline
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
# import the needed modules

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import datetime

from finrl.apps import config
from data_loader import DataLoader
from trainer import Trainer

matplotlib.use('Agg')

In [11]:
def create_folders():
    if not os.path.exists("./" + config.DATA_SAVE_DIR):
        os.makedirs("./" + config.DATA_SAVE_DIR)
    if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
        os.makedirs("./" + config.TRAINED_MODEL_DIR)
    if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
        os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
    if not os.path.exists("./" + config.RESULTS_DIR):
        os.makedirs("./" + config.RESULTS_DIR)

In [12]:
"""
    The main script used for training the DRL model

    Can adjust the train and trade (used for backtesting) dates.

    Also it is possible to specify multiple stocks the DRL has to trade. 

"""

# First created the folders
create_folders()

# Adjust these dates accordingly
train_dates = ('2009-01-01','2016-01-01')
trade_dates = ('2016-01-01','2021-01-01')

# Can extend the ticker list with stocks listed in config.DOW_30_TICKER
ticker_list = ['AAPL']

# Initialize DataLoader object and preprocess the historical stock data, then split the data
dl = DataLoader(ticker_list=ticker_list, start_date=train_dates[0], end_date=trade_dates[1])
dl.preprocess()

df = dl.get_data()

dl.split_dataset(train_dates, trade_dates)

train_data = dl.get_train_data()
trade_data = dl.get_trade_data()


# Create a Trainer object which creates the Agent and train&trade environments
model = Trainer(train_data, trade_data)

model.set_environment('train')
model.set_environment('trade')
model.set_agent()

# Train the model
model.train()

# Save the model
model.save_model('agent.model')

print("Done")


Fetching data....
2009-01-01 2021-01-01 ['AAPL']
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3022, 8)
Starting preprocessing...
Successfully added technical indicators
Splitting dataset into train and trade
Creating training environment
caching data
data cached!
Creating trading environment
caching data
data cached!
{'n_steps': 256, 'ent_coef': 0.0, 'learning_rate': 5e-06, 'batch_size': 1024, 'gamma': 0.99}


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=256 and n_envs=1)


EPISODE|STEPS|TERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|CASH_PROPORTION
   1| 499|update         |$984,309       |$1,011,338     |0.00228%  |1.13383%  |97.33%    
EPISODE|STEPS|TERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|CASH_PROPORTION




   1| 499|update         |$1,000,000     |$1,000,000     |0.00000%  |0.00000%  |100.00%   
   1| 999|update         |$1,000,000     |$1,000,000     |0.00000%  |0.00000%  |100.00%   
   1|1258|Last Date      |$1,000,000     |$1,000,000     |0.00000%  |0.00000%  |100.00%   
Eval num_timesteps=500, episode_reward=0.00 +/- 0.00
Episode length: 1259.00 +/- 0.00
New best mean reward!
   1| 999|update         |$830,778       |$1,113,648     |0.01139%  |11.36483% |74.60%    
   3| 499|update         |$927,195       |$1,033,037     |0.00663%  |3.30366%  |89.75%    
   3| 999|update         |$853,726       |$1,135,948     |0.01362%  |13.59482% |75.16%    
   3|1258|Last Date      |$815,669       |$1,416,433     |0.03310%  |41.64326% |57.59%    
Eval num_timesteps=1000, episode_reward=0.10 +/- 0.00
Episode length: 1259.00 +/- 0.00
New best mean reward!
   1|1140|Last Date      |$797,797       |$1,067,714     |0.00594%  |6.77137%  |74.72%    
   5| 499|update         |$784,608       |$1,098,014   

  35| 999|update         |$2,983         |$2,168,215     |0.07390%  |116.82150%|0.14%     
  35|1258|Last Date      |$2,968         |$4,174,604     |0.18622%  |317.46041%|0.07%     
Eval num_timesteps=9000, episode_reward=0.64 +/- 0.00
Episode length: 1259.00 +/- 0.00
New best mean reward!
   8| 999|update         |$260,195       |$1,658,156     |0.05879%  |65.81557% |15.69%    
   8|1149|Last Date      |$220,137       |$1,401,223     |0.02969%  |40.12228% |15.71%    
  37| 499|update         |$198,405       |$1,370,306     |0.05917%  |37.03061% |14.48%    
  37| 999|update         |$3,058         |$2,242,809     |0.07989%  |124.28091%|0.14%     
  37|1258|Last Date      |$3,058         |$4,318,254     |0.19536%  |331.82544%|0.07%     
Eval num_timesteps=9500, episode_reward=0.67 +/- 0.00
Episode length: 1259.00 +/- 0.00
New best mean reward!
   9| 499|update         |$501,473       |$1,274,516     |0.05512%  |27.45159% |39.35%    
  39| 499|update         |$122,327       |$1,413,935  

In [15]:
"""
    Backtest the performance of the trained model
"""

plot = model.backtest(trade_dates)

Starting backtesting
EPISODE|STEPS|TERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|CASH_PROPORTION
   1| 499|update         |$494,057       |$1,212,972     |0.04277%  |21.29716% |40.73%    
   1| 999|update         |$52,649        |$1,869,935     |0.05497%  |86.99352% |2.82%     
   1|1091|CASH SHORTAGE  |$3,932         |$1,986,315     |0.05435%  |98.63154% |0.20%     
hit end!
Annual return          0.167758
Cumulative returns     0.957017
Annual volatility      0.229804
Sharpe ratio           0.790807
Calmar ratio           0.515796
Stability              0.775309
Max drawdown          -0.325241
Omega ratio            1.205945
Sortino ratio          1.140184
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.136321
Daily value at risk   -0.028231
dtype: float64
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (1260, 8)


Start date,2016-09-01,2016-09-01
End date,2020-12-31,2020-12-31
Total months,51,51
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,16.776%,
Cumulative returns,95.702%,
Annual volatility,22.98%,
Sharpe ratio,0.79,
Calmar ratio,0.52,
Stability,0.78,
Max drawdown,-32.524%,
Omega ratio,1.21,
Sortino ratio,1.14,
Skew,,


Worst drawdown periods,Net drawdown in %,Peak date,Valley date,Recovery date,Duration
0,32.52,2019-06-06,2019-09-04,2020-05-12,244.0
1,30.97,2020-10-12,2020-11-18,NaT,
2,8.61,2018-09-18,2018-10-09,2018-10-24,27.0
3,7.08,2018-11-07,2018-12-27,2019-01-04,43.0
4,4.75,2020-09-28,2020-10-01,2020-10-06,7.0


Stress Events,mean,min,max
New Normal,0.07%,-12.63%,11.76%
