# Part 1. Install Packages

In [60]:
## install required packages
#!pip install swig
#!pip install wrds
#!pip install pyportfolioopt
## install finrl library
## !pip install git+https://github.com/AI4Finance-Foundation/FinRL.git


In [61]:
import pandas as pd
import numpy as np
import datetime
import gymnasium
import stable_baselines3
import stockstats
import alpaca_trade_api
import exchange_calendars

import yfinance as yf

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl import config_tickers
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR


import itertools

# Part 2. Fetch data

**OHLCV**: Data downloaded are in the form of OHLCV, corresponding to **open, high, low, close, volume,** respectively. OHLCV is important because they contain most of numerical information of a stock in time series. From OHLCV, traders can get further judgement and prediction like the momentum, people's interest, market trends, etc.

In [62]:
TRAIN_START_DATE = '2021-01-01'
TRAIN_END_DATE = '2023-07-01'
TRADE_START_DATE = '2023-07-01'
TRADE_END_DATE = '2024-09-15'

config_tickers.DOW_30_TICKER


['AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CSCO',
 'CVX',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'KO',
 'JPM',
 'MCD',
 'MMM',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'TRV',
 'UNH',
 'CRM',
 'VZ',
 'V',
 'WBA',
 'WMT',
 'DIS',
 'DOW']

In [63]:
#aapl_df_yf = yf.download(tickers = "aapl", start=TRAIN_START_DATE, end=TRADE_END_DATE)
#aapl_df_yf.head()

df_raw = YahooDownloader(start_date = TRAIN_START_DATE,
                     end_date = TRADE_END_DATE,
                     ticker_list = config_tickers.DOW_30_TICKER).fetch_data()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Shape of DataFrame:  (27900, 8)


In [64]:
df_raw.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2021-01-04,133.520004,133.610001,126.760002,126.683434,143301900,AAPL,0
1,2021-01-04,231.25,231.25,223.669998,201.544556,3088200,AMGN,0
2,2021-01-04,121.300003,121.800003,116.849998,112.457108,3472100,AXP,0
3,2021-01-04,210.0,210.199997,202.490005,202.720001,21225600,BA,0
4,2021-01-04,183.0,185.979996,180.25,168.735245,4078300,CAT,0


# Part 3: Preprocess Data
We need to check for missing data and do feature engineering to convert the data point into a state.
* **Adding technical indicators**. In practical trading, various information needs to be taken into account, such as historical prices, current holding shares, technical indicators, etc. Here, we demonstrate two trend-following technical indicators: MACD and RSI.
* **Adding turbulence index**. Risk-aversion reflects whether an investor prefers to protect the capital. It also influences one's trading strategy when facing different market volatility level. To control the risk in a worst-case scenario, such as financial crisis of 2007–2008, FinRL employs the turbulence index that measures extreme fluctuation of asset price.

In [65]:
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list = INDICATORS,
                     use_vix=True,
                     use_turbulence=True,
                     user_defined_feature = False)


In [66]:
processed = fe.preprocess_data(df_raw)

[*********************100%***********************]  1 of 1 completed

Successfully added technical indicators
Shape of DataFrame:  (929, 8)
Successfully added vix





Successfully added turbulence index


In [67]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)
processed_full.head()


Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2021-01-04,AAPL,133.520004,133.610001,126.760002,126.683434,143301900.0,0.0,0.0,129.68168,125.251494,100.0,66.666667,100.0,126.683434,126.683434,26.969999,0.0
1,2021-01-04,AMGN,231.25,231.25,223.669998,201.544556,3088200.0,0.0,0.0,129.68168,125.251494,100.0,66.666667,100.0,201.544556,201.544556,26.969999,0.0
2,2021-01-04,AXP,121.300003,121.800003,116.849998,112.457108,3472100.0,0.0,0.0,129.68168,125.251494,100.0,66.666667,100.0,112.457108,112.457108,26.969999,0.0
3,2021-01-04,BA,210.0,210.199997,202.490005,202.720001,21225600.0,0.0,0.0,129.68168,125.251494,100.0,66.666667,100.0,202.720001,202.720001,26.969999,0.0
4,2021-01-04,CAT,183.0,185.979996,180.25,168.735245,4078300.0,0.0,0.0,129.68168,125.251494,100.0,66.666667,100.0,168.735245,168.735245,26.969999,0.0


In [68]:
# Part 4: Save the Data

### Split the data for training and trading

In [69]:
train = data_split(processed_full, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))

18810
9060


### Save data to csv file


In [70]:
train.to_csv('train_data.csv')
trade.to_csv('trade_data.csv')

# Part 4. Build A Market Environment in OpenAI Gym-style

The core element in reinforcement learning are **agent** and **environment**. You can understand RL as the following process: 

The agent is active in a world, which is the environment. It observe its current condition as a **state**, and is allowed to do certain **actions**. After the agent execute an action, it will arrive at a new state. At the same time, the environment will have feedback to the agent called **reward**, a numerical signal that tells how good or bad the new state is. As the figure above, agent and environment will keep doing this interaction.

The goal of agent is to get as much cumulative reward as possible. Reinforcement learning is the method that agent learns to improve its behavior and achieve that goal.

To achieve this in Python, we follow the OpenAI gym style to build the stock data into environment.

state-action-reward are specified as follows:

* **State s**: The state space represents an agent's perception of the market environment. Just like a human trader analyzing various information, here our agent passively observes the price data and technical indicators based on the past data. It will learn by interacting with the market environment (usually by replaying historical data).

* **Action a**: The action space includes allowed actions that an agent can take at each state. For example, a ∈ {−1, 0, 1}, where −1, 0, 1 represent
selling, holding, and buying. When an action operates multiple shares, a ∈{−k, ..., −1, 0, 1, ..., k}, e.g.. "Buy 10 shares of AAPL" or "Sell 10 shares of AAPL" are 10 or −10, respectively

* **Reward function r(s, a, s′)**: Reward is an incentive for an agent to learn a better policy. For example, it can be the change of the portfolio value when taking a at state s and arriving at new state s',  i.e., r(s, a, s′) = v′ − v, where v′ and v represent the portfolio values at state s′ and s, respectively


**Market environment**: 30 constituent stocks of Dow Jones Industrial Average (DJIA) index. Accessed at the starting date of the testing period.

## Read data

We first read the .csv file of our training data into dataframe.

In [71]:
train = pd.read_csv('train_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure 
# it has the columns and index in the form that could be make into the environment. 
# Then you can comment and skip the following two lines.
train = train.set_index(train.columns[0])
train.index.names = ['']


## Construct the environment

Calculate and specify the parameters we need for constructing the environment.

In [72]:
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv

stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

Stock Dimension: 30, State Space: 301


## Environment for training

In [73]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


# Part 5. Train DRL Agents
* Here, the DRL algorithms are from **[Stable Baselines 3](https://stable-baselines3.readthedocs.io/en/master/)**. It's a library that implemented popular DRL algorithms using pytorch, succeeding to its old version: Stable Baselines.
* Users are also encouraged to try **[ElegantRL](https://github.com/AI4Finance-Foundation/ElegantRL)** and **[Ray RLlib](https://github.com/ray-project/ray)**.

In [74]:
from finrl.agents.stablebaselines3.models import DRLAgent

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_td3 = True
if_using_sac = True

## Agent Training: 5 algorithms (A2C, DDPG, PPO, TD3, SAC)

### Agent 1: A2C

In [75]:
from stable_baselines3.common.logger import configure

agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

if if_using_a2c:
  # set up logger
  tmp_path = RESULTS_DIR + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)


{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
Logging to results/a2c


In [76]:
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=50000) if if_using_a2c else None

--------------------------------------
| time/                 |            |
|    fps                | 491        |
|    iterations         | 100        |
|    time_elapsed       | 1          |
|    total_timesteps    | 500        |
| train/                |            |
|    entropy_loss       | -42.4      |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 99         |
|    policy_loss        | -62.4      |
|    reward             | -1.1268827 |
|    std                | 0.996      |
|    value_loss         | 4.17       |
--------------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 517        |
|    iterations         | 200        |
|    time_elapsed       | 1          |
|    total_timesteps    | 1000       |
| train/                |            |
|    entropy_loss       | -42.5      |
|    explained_variance | 0.057      |
|    learning_rate      |

In [77]:
trained_a2c.save(TRAINED_MODEL_DIR + "/agent_a2c") if if_using_a2c else None

### Agent 2: DDPG


In [78]:
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

if if_using_ddpg:
  # set up logger
  tmp_path = RESULTS_DIR + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)


{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device
Logging to results/ddpg


In [79]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000) if if_using_ddpg else None

----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 164       |
|    time_elapsed    | 15        |
|    total_timesteps | 2508      |
| train/             |           |
|    actor_loss      | -1.05     |
|    critic_loss     | 18.6      |
|    learning_rate   | 0.001     |
|    n_updates       | 2407      |
|    reward          | 0.9440603 |
----------------------------------
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 165       |
|    time_elapsed    | 30        |
|    total_timesteps | 5016      |
| train/             |           |
|    actor_loss      | 5.22      |
|    critic_loss     | 1.31      |
|    learning_rate   | 0.001     |
|    n_updates       | 4915      |
|    reward          | 0.9440603 |
----------------------------------
day: 626, episode: 90
begin_total_asset: 1000000.00
end_total_asset: 1181349.13
total_rewa

In [80]:
trained_ddpg.save(TRAINED_MODEL_DIR + "/agent_ddpg") if if_using_ddpg else None

### Agent 3: PPO

In [81]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

if if_using_ppo:
  # set up logger
  tmp_path = RESULTS_DIR + '/ppo'
  new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to results/ppo


In [82]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=200000) if if_using_ppo else None

-------------------------------------
| time/              |              |
|    fps             | 703          |
|    iterations      | 1            |
|    time_elapsed    | 2            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | -0.055938117 |
-------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 662         |
|    iterations           | 2           |
|    time_elapsed         | 6           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.013933469 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -42.6       |
|    explained_variance   | -0.117      |
|    learning_rate        | 0.00025     |
|    loss                 | 3.11        |
|    n_updates            | 10          |
|    policy_gradient_loss | 

In [83]:
trained_ppo.save(TRAINED_MODEL_DIR + "/agent_ppo") if if_using_ppo else None

### Agent 4: TD3

In [84]:
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 100, 
              "buffer_size": 1000000, 
              "learning_rate": 0.001}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

if if_using_td3:
  # set up logger
  tmp_path = RESULTS_DIR + '/td3'
  new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_td3.set_logger(new_logger_td3)

{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cpu device
Logging to results/td3


In [85]:
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=50000) if if_using_td3 else None

----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 165       |
|    time_elapsed    | 15        |
|    total_timesteps | 2508      |
| train/             |           |
|    actor_loss      | 45.3      |
|    critic_loss     | 2.67e+04  |
|    learning_rate   | 0.001     |
|    n_updates       | 2407      |
|    reward          | 0.9769124 |
----------------------------------
day: 626, episode: 490
begin_total_asset: 1000000.00
end_total_asset: 1193210.46
total_reward: 193210.46
total_cost: 999.00
total_trades: 13146
Sharpe: 0.491
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 165       |
|    time_elapsed    | 30        |
|    total_timesteps | 5016      |
| train/             |           |
|    actor_loss      | 29.8      |
|    critic_loss     | 2.39      |
|    learning_rate   | 0.001     |
|    n_updates       | 4915      |
| 

In [86]:
trained_td3.save(TRAINED_MODEL_DIR + "/agent_td3") if if_using_td3 else None

### Agent 5: SAC

In [87]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = RESULTS_DIR + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)

{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to results/sac


In [88]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=70000) if if_using_sac else None

----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 119       |
|    time_elapsed    | 20        |
|    total_timesteps | 2508      |
| train/             |           |
|    actor_loss      | 55.4      |
|    critic_loss     | 63.9      |
|    ent_coef        | 0.097     |
|    ent_coef_loss   | -117      |
|    learning_rate   | 0.0001    |
|    n_updates       | 2407      |
|    reward          | 1.0255969 |
----------------------------------
day: 626, episode: 570
begin_total_asset: 1000000.00
end_total_asset: 1096255.39
total_reward: 96255.39
total_cost: 76078.19
total_trades: 16380
Sharpe: 0.294
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 119       |
|    time_elapsed    | 42        |
|    total_timesteps | 5016      |
| train/             |           |
|    actor_loss      | 40.9      |
|    critic_loss     | 625       |
|

In [89]:
trained_sac.save(TRAINED_MODEL_DIR + "/agent_sac") if if_using_sac else None

## Save the trained agent
Trained agents should have already been saved in the "trained_models" drectory after you run the code blocks above.


# Part 2. Backtesting