In [1]:
import sys
sys.path.append("./FinRL-Library")

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import datetime

from finrl.config import config
from finrl.marketdata.yahoodownloader import YahooDownloader
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing.data import data_split
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
from datetime import datetime,timedelta


<a id='1.4'></a>
## 2.4. Create Folders

In [2]:
import os
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)


In [3]:
col_names=['open','high','low','close']
data_df = pd.read_csv('training.csv',names=col_names)
data_df['tic']='IBM'
base=datetime.strptime(config.START_DATE,"%Y-%m-%d")
date=[base + timedelta(days=x)for x in range(len(data_df))]
data_df['date']=date

In [4]:
data_df.head()

Unnamed: 0,open,high,low,close,tic,date
0,186.73,188.71,186.0,186.3,IBM,1990-01-01
1,185.57,186.33,184.94,185.54,IBM,1990-01-02
2,184.81,185.03,183.1,184.66,IBM,1990-01-03
3,184.39,184.48,182.31,182.54,IBM,1990-01-04
4,182.2,182.27,180.27,181.59,IBM,1990-01-05


In [5]:
## user can add more technical indicators
## check https://github.com/jealous/stockstats for different names
tech_indicator_list=config.TECHNICAL_INDICATORS_LIST
tech_indicator_list=tech_indicator_list+['kdjk','open_2_sma','boll','close_10.0_le_5_c','wr_10','dma','trix']
print(tech_indicator_list)

['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma', 'kdjk', 'open_2_sma', 'boll', 'close_10.0_le_5_c', 'wr_10', 'dma', 'trix']


<a id='3.2'></a>
## 4.2 Perform Feature Engineering

In [6]:
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = tech_indicator_list,
                    use_turbulence=False,
                    user_defined_feature = False)

data_df = fe.preprocess_data(data_df)

train=data_df

Successfully added technical indicators


In [7]:
data_df.head()

Unnamed: 0,open,high,low,close,tic,date,macd,boll_ub,boll_lb,rsi_30,...,dx_30,close_30_sma,close_60_sma,kdjk,open_2_sma,boll,close_10.0_le_5_c,wr_10,dma,trix
0,186.73,188.71,186.0,186.3,IBM,1990-01-01,0.0,186.994802,184.845198,0.0,...,100.0,186.3,186.3,37.02337,186.73,186.3,0.0,88.929889,0.0,-0.064833
1,185.57,186.33,184.94,185.54,IBM,1990-01-02,-0.017051,186.994802,184.845198,0.0,...,100.0,185.92,185.92,29.987287,186.15,185.92,0.0,84.084881,0.0,-0.064833
2,184.81,185.03,183.1,184.66,IBM,1990-01-03,-0.049015,187.141463,183.858537,0.0,...,100.0,185.5,185.5,29.260687,185.19,185.5,0.0,72.192513,0.0,-0.07609
3,184.39,184.48,182.31,182.54,IBM,1990-01-04,-0.138894,188.009287,181.510713,0.0,...,100.0,184.76,184.76,20.705041,184.6,184.76,0.0,96.40625,0.0,-0.10787
4,182.2,182.27,180.27,181.59,IBM,1990-01-05,-0.222611,188.120686,180.131314,0.0,...,100.0,184.126,184.126,19.016631,183.295,184.126,0.0,84.36019,0.0,-0.130739


In [8]:
stock_dimension = 1
state_space = 1 + 2*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


Stock Dimension: 1, State Space: 18


In [9]:
env_kwargs = {
    "hmax": 1, 
    "initial_amount": 10000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [10]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


### Model 1: A2C

In [11]:

agent = DRLAgent(env = env_train)

A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)

trained_a2c = agent.train_model(model=model_a2c, 
                                tb_log_name='a2c',
                                total_timesteps=50000)


{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cuda device
Logging to tensorboard_log/a2c/a2c_25
-------------------------------------
| time/                 |           |
|    fps                | 37        |
|    iterations         | 100       |
|    time_elapsed       | 13        |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -1.43     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_updates          | 99        |
|    policy_loss        | 5.08e-05  |
|    std                | 1.02      |
|    value_loss         | 2.99e-09  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 68        |
|    iterations         | 200       |
|    time_elapsed       | 14        |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -1.45     |
|    explai

### Model 2: DDPG

In [12]:

PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)
trained_ppo = agent.train_model(model=model_ppo, 
                            tb_log_name='ppo',
                            total_timesteps=60000)


{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cuda device
Logging to tensorboard_log/ppo/ppo_19
----------------------------------
| environment/        |          |
|    portfolio_value  | 1.01e+04 |
|    total_cost       | 0        |
|    total_reward     | 67.2     |
|    total_reward_pct | 0.672    |
|    total_trades     | 306      |
| time/               |          |
|    fps              | 615      |
|    iterations       | 1        |
|    time_elapsed     | 3        |
|    total_timesteps  | 2048     |
----------------------------------
------------------------------------------
| environment/            |              |
|    portfolio_value      | 9.97e+03     |
|    total_cost           | 0            |
|    total_reward         | -33.8        |
|    total_reward_pct     | -0.338       |
|    total_trades         | 314          |
| time/                   |              |
|    fps                  | 546          |
|    iterations      

In [16]:
trade = pd.read_csv('testing.csv',names=col_names)
trade['tic']='IBM'
base=datetime.strptime(config.START_TRADE_DATE,"%Y-%m-%d")
date=[base + timedelta(days=x)for x in range(len(trade))]
trade['date']=date
trade=fe.preprocess_data(trade)

e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)

max_profit=0
actions=None
negative_trade=0
for _ in range(10):
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_ppo,environment = e_trade_gym)
    final_profit=df_account_value['account_value'].iloc[-1]
    if final_profit<10000:
        negative_trade+=1
    if final_profit>max_profit:
        max_profit=final_profit
        actions=pd.DataFrame(np.array(df_actions['actions'],dtype='int'))

print(actions)
print(max_profit)
print(negative_trade)
actions.to_csv("action.csv",index=False,header=False)


Successfully added technical indicators
    0
0   0
1  -1
2   0
3   1
4   0
5   0
6  -1
7   0
8   0
9   0
10  0
11  0
12  0
13  0
14  0
15  0
16  0
17  0
18  0
10004.349999999999
3
