In [115]:
# ## install finrl library
# !pip install wrds
# !pip install swig
# !pip install -q condacolab
# import condacolab
# condacolab.install()
# !apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig
# !pip install git+https://github.com/flpymonkey/FinRL-Fork.git

In [116]:
import warnings
warnings.filterwarnings("ignore")

In [117]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime

%matplotlib inline
from finrl.config_tickers import DOW_30_TICKER
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent,DRLEnsembleAgent

from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

from pprint import pprint

# import sys
# sys.path.append("../FinRL-Library")

import itertools

In [118]:
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)

check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])

In [119]:
# TODO, remove DOW from training as it is missing some values in 2019
print(DOW_30_TICKER)

['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW']


In [120]:
TRAIN_START_DATE = '2009-04-01'
TRAIN_END_DATE = '2021-01-01'
TEST_START_DATE = '2021-01-01'
TEST_END_DATE = '2024-01-01'

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.config_tickers import DOW_30_TICKER


# TODO Drop the DOW stock
value_to_remove = "DOW"
# Create a new list without the specified string
DOW_30_TICKER = [x for x in DOW_30_TICKER if x != value_to_remove]
print(DOW_30_TICKER)

TIME_WINDOW = 40
COMMISSION_FEE_PERCENT = 0.001
INITIAL_CASH = 1000000


df = YahooDownloader(start_date = TRAIN_START_DATE,
                     end_date = TEST_END_DATE,
                     ticker_list = DOW_30_TICKER).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS']



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*******

Shape of DataFrame:  (107677, 8)


In [121]:
# Add additional technical indicators
INDICATORS = ['macd',
            'rsi_30',
            'cci_30',
            'dx_30']


FEATURES = ["close", "high", "low", "volume"].extend(INDICATORS)

In [122]:
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list = INDICATORS,
                     use_turbulence=True,
                     user_defined_feature = False)

processed = fe.preprocess_data(df)
processed = processed.copy()
processed = processed.fillna(0)
processed = processed.replace(np.inf,0)

# here you can see the engineered features are added to each stock day
print(processed)


Successfully added technical indicators
Successfully added turbulence index
              date        open        high         low       close     volume  \
0       2009-04-01    3.717500    3.892857    3.710357    3.278072  589372000   
1       2009-04-01   48.779999   48.930000   47.099998   34.259624   10850100   
2       2009-04-01   13.340000   14.640000   13.080000   11.463931   27701800   
3       2009-04-01   34.520000   35.599998   34.209999   26.850754    9288800   
4       2009-04-01   27.500000   29.520000   27.440001   19.116180   15308300   
...            ...         ...         ...         ...         ...        ...   
107672  2023-12-29  525.979980  528.239990  523.919983  520.342346    2080100   
107673  2023-12-29  260.570007  261.459991  259.670013  258.866302    4074600   
107674  2023-12-29   37.380001   37.759998   37.320000   35.344772   16205200   
107675  2023-12-29   26.440001   26.629999   25.750000   24.868887   10853300   
107676  2023-12-29   52.509998   

In [123]:
stock_dimension = len(processed.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 175


In [124]:

# Split this on the train and trade test data

train = data_split(processed, TRAIN_START_DATE,TRAIN_END_DATE)
test = data_split(processed, TEST_START_DATE,TEST_END_DATE)
print(len(train))
print(len(test))


train.to_csv('train_data.csv')
test.to_csv('test_data.csv')

85840
21837


In [125]:
import pandas as pd
from stable_baselines3.common.logger import configure

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import TRAINED_MODEL_DIR, RESULTS_DIR
from finrl.main import check_and_make_directories
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv

check_and_make_directories([TRAINED_MODEL_DIR])

In [126]:
# Train the data

train = pd.read_csv('train_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure 
# it has the columns and index in the form that could be make into the environment. 
# Then you can comment and skip the following two lines.
train = train.set_index(train.columns[0])
train.index.names = ['']

In [127]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 175


In [128]:
buy_cost_list = sell_cost_list = [COMMISSION_FEE_PERCENT] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": INITIAL_CASH,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [129]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [130]:
# Train the PPO agent:
# 
#  https://stable-baselines.readthedocs.io/en/master/modules/ppo2.html

agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    #"ent_coef": 0.01,
    "ent_coef": 0.9,
    "learning_rate": 0.00025,
    "batch_size": 128,
    "clip_range": 0.9,

}

# Lower clip_range makes the stocks flatter, very conservative policy

# TODO try playing around with the number of epochs? n_epochs
# TODO try playing around more with the entropy term, make sure agent does enough exploration during training
# TODO try playing around more with the clip papram here


model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

# set up logger
tmp_path = RESULTS_DIR + '/ppo'
new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
# Set new logger
model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.9, 'learning_rate': 0.00025, 'batch_size': 128, 'clip_range': 0.9}
Using cpu device
Logging to results/ppo


In [131]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=90_000)

Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Error: 'reward'
Original Error: 'obs'
Inner Erro

In [132]:
trained_ppo.save(TRAINED_MODEL_DIR + "/agent_ppo")

In [133]:
# Backtest the results


train = pd.read_csv('train_data.csv')
test = pd.read_csv('test_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure 
# it has the columns and index in the form that could be make into the environment. 
# Then you can comment and skip the following lines.
train = train.set_index(train.columns[0])
train.index.names = ['']
test = test.set_index(test.columns[0])
test.index.names = ['']

In [134]:
from stable_baselines3 import PPO
from finrl.config import TRAINED_MODEL_DIR

trained_ppo = PPO.load(TRAINED_MODEL_DIR + "/agent_ppo") 

# Here is how to load the model