In [None]:
import sys

sys.path.append('../')

In [None]:
import yaml
import pandas as pd

In [None]:
tic_list = yaml.load(open('ticlist.yml', 'r'))

In [None]:
start_date = '2009-01-01'
end_date = '2021-08-30'

In [None]:
history_df_name = './hist_px.csv'

In [None]:
tech_indicators = ['macd',
 'rsi_30',
 'cci_30',
 'dx_30']

### Data Extraction 

In [None]:
from neo_finrl.data_processors.processor_yahoofinance import YahooFinanceProcessor

In [None]:
data_downloader = YahooFinanceProcessor()

In [None]:
stock_history_df = data_downloader.download_data(start_date, end_date, tic_list['tic'], '1D')

In [None]:
if history_df_name != None:
    stock_history_df.to_csv(history_df_name, index = False)

In [None]:
# simple hack for currency 
for col_i in ['open', 'high', 'low', 'close', 'adjcp']:
    stock_history_df.loc[stock_history_df.tic.str.endswith('.SI'), col_i] = \
    stock_history_df.loc[stock_history_df.tic.str.endswith('.SI'), col_i]/1.3

    stock_history_df.loc[stock_history_df.tic.str.endswith('.HK'), col_i] = \
    stock_history_df.loc[stock_history_df.tic.str.endswith('.HK'), col_i]/7.8

### Preprocess data

In [None]:
stock_data_df = data_downloader.clean_data(stock_history_df)
stock_data_df = data_downloader.add_technical_indicator(stock_data_df, tech_indicators)
stock_data_df = data_downloader.add_turbulence(stock_data_df)

In [None]:
stock_data_df.to_csv('cleaned_stock.csv', index = False)

### Create env

In [None]:
from neo_finrl.env_stock_trading.env_stock_trading import StockTradingEnv

In [None]:
stock_data_df = pd.read_csv('cleaned_stock.csv')

In [None]:
price_array, tech_array, risk_array = data_downloader.df_to_array_fix(stock_data_df, \
                                                                  tech_indicator_list= tech_indicators, \
                                                                  if_vix = False)

In [None]:
import numpy as np

In [None]:
config = dict()

config['price_array'] = price_array
config['tech_array'] = tech_array
config['risk_array'] = risk_array
config['if_train'] = True

initial_account = 1e5
# set high threshold to avoid whole sell
risk_thresh = np.nanmax(risk_array) + 1

In [None]:
stock_env = StockTradingEnv(config, \
                            initial_account=initial_account, \
                            risk_thresh=risk_thresh)

In [None]:
price_array.shape, tech_array.shape, risk_array.shape

### Test RL

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
env_train = DummyVecEnv([lambda : stock_env])

model = PPO("MlpPolicy", env_train, learning_rate=0.00025, 
                        n_steps=2048, batch_size=128, ent_coef=0.0, 
                        gamma=0.99, seed=312)

In [None]:
model.learn(total_timesteps=1e4, tb_log_name = 'ppo')
print('Training finished!')

In [None]:
model.save(cwd)
print('Trained model saved in ' + str(cwd))

In [None]:
from elegantrl.agent import *
from elegantrl.run import *

In [None]:
args = Arguments(agent=AgentPPO(), env=stock_env, if_on_policy=True)

In [None]:
args.learning_rate = 0.00025
args.batch_size = 128
args.gamma = 0.99
args.seed = 312
args.break_step = 1e6
args.net_dimension = 2**4
args.cwd = './testPPO'

In [None]:
train_and_evaluate(args)