In [9]:
import sys
sys.path.append("./FinRL-Library")

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import datetime

from finrl.config import config
from finrl.marketdata.yahoodownloader import YahooDownloader
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing.data import data_split
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
from datetime import datetime,timedelta


<a id='1.4'></a>
## 2.4. Create Folders

In [11]:
import os
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)


In [13]:
col_names=['open','high','low','close']
data_df = pd.read_csv('training.csv',names=col_names)
data_df['tic']='IBM'
base=datetime.strptime(config.START_DATE,"%Y-%m-%d")
date=[base + timedelta(days=x)for x in range(len(data_df))]
data_df['date']=date

In [15]:
data_df.head()

Unnamed: 0,open,high,low,close,tic,date
0,186.73,188.71,186.0,186.3,IBM,1990-01-01
1,185.57,186.33,184.94,185.54,IBM,1990-01-02
2,184.81,185.03,183.1,184.66,IBM,1990-01-03
3,184.39,184.48,182.31,182.54,IBM,1990-01-04
4,182.2,182.27,180.27,181.59,IBM,1990-01-05


In [17]:
## user can add more technical indicators
## check https://github.com/jealous/stockstats for different names
#tech_indicator_list=config.TECHNICAL_INDICATORS_LIST
tech_indicator_list=['macd','macds','macdh','kdjk','kdjd','close_5_sma','close_10_sma','close_20_sma','close_60_sma']

<a id='3.2'></a>
## 4.2 Perform Feature Engineering

In [19]:
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = tech_indicator_list,
                    use_turbulence=False,
                    user_defined_feature = False)

data_df = fe.preprocess_data(data_df)

train=data_df

Successfully added technical indicators


In [20]:
train.tail()

Unnamed: 0,open,high,low,close,tic,date,macd,macds,macdh,kdjk,kdjd,close_5_sma,close_10_sma,close_20_sma,close_60_sma
1483,151.95,152.2,151.33,151.84,IBM,1994-01-23,-0.021715,-0.040454,0.018738,77.546456,60.081346,151.008,149.771,151.1415,148.879
1484,152.06,152.49,151.62,151.98,IBM,1994-01-24,0.07794,-0.016775,0.094715,82.323964,67.495552,151.61,150.053,151.0565,149.028167
1485,152.35,152.93,151.7,152.47,IBM,1994-01-25,0.194217,0.025423,0.168793,85.93423,73.641778,152.002,150.46,150.962,149.168
1486,152.81,153.61,152.17,153.55,IBM,1994-01-26,0.369258,0.09419,0.275067,90.295488,79.193015,152.322,150.926,150.9365,149.343167
1487,153.65,154.41,153.08,153.97,IBM,1994-01-27,0.535694,0.182491,0.353203,90.90189,83.095973,152.762,151.613,150.9335,149.512333


In [21]:
stock_dimension = 1
state_space = 1 + 2*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


Stock Dimension: 1, State Space: 12


In [22]:
env_kwargs = {
    "hmax": 1, 
    "initial_amount": 100000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-5

}

e_train_gym = StockTradingEnv(df = train, **env_kwargs,)

In [23]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))
agent = DRLAgent(env = env_train)


<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


### Model 1: A2C

In [24]:
'''
agent = DRLAgent(env = env_train)

A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)

trained_a2c = agent.train_model(model=model_a2c, 
                                tb_log_name='a2c',
                                total_timesteps=50000)
'''

'\nagent = DRLAgent(env = env_train)\n\nA2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}\nmodel_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)\n\ntrained_a2c = agent.train_model(model=model_a2c, \n                                tb_log_name=\'a2c\',\n                                total_timesteps=50000)\n'

### Model 2: PPO

In [25]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)
trained_ppo = agent.train_model(model=model_ppo, 
                            tb_log_name='ppo',
                            total_timesteps=60000)


{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cuda device
Logging to tensorboard_log/ppo/ppo_59
----------------------------------
| environment/        |          |
|    portfolio_value  | 9.99e+04 |
|    total_cost       | 0        |
|    total_reward     | -121     |
|    total_reward_pct | -0.121   |
|    total_trades     | 316      |
| time/               |          |
|    fps              | 638      |
|    iterations       | 1        |
|    time_elapsed     | 3        |
|    total_timesteps  | 2048     |
----------------------------------
------------------------------------------
| environment/            |              |
|    portfolio_value      | 9.99e+04     |
|    total_cost           | 0            |
|    total_reward         | -73.9        |
|    total_reward_pct     | -0.0739      |
|    total_trades         | 308          |
| time/                   |              |
|    fps                  | 566          |
|    iterations      

In [26]:
trade = pd.read_csv('testing.csv',names=col_names)
trade['tic']='IBM'
base=datetime.strptime(config.START_TRADE_DATE,"%Y-%m-%d")
date=[base + timedelta(days=x)for x in range(len(trade))]
trade['date']=date
trade=fe.preprocess_data(trade)

e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)

max_profit=0
actions=None
negative_trade=0
total_testing_num=10
for _ in range(total_testing_num):
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_ppo,environment = e_trade_gym)
    final_profit=df_account_value['account_value'].iloc[-1]
    if final_profit<100000:
        negative_trade+=1
    
df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_ppo,environment = e_trade_gym)
final_profit=df_account_value['account_value'].iloc[-1]
actions=pd.DataFrame(np.array(df_actions['actions'],dtype='int'))
print(actions)
print(final_profit)
print(f"{(negative_trade/total_testing_num)*100}%")
actions.to_csv("output.csv",index=False,header=False)


Successfully added technical indicators
    0
0   0
1   0
2  -1
3   0
4   0
5   0
6   0
7   0
8   0
9   1
10  0
11 -1
12  0
13  0
14  0
15  0
16  1
17 -1
18  0
100001.56000000001
10.0%
