In [None]:
import sys

sys.path.append('../')

In [None]:
import yaml
import pandas as pd

In [None]:
tic_list = yaml.load(open('ticlist.yml', 'r'))

In [None]:
start_date = '2009-01-01'
end_date = '2021-08-30'

In [None]:
train_test_split_index = 3000

In [None]:
history_df_name = './hist_px.csv'
cwd = './testPPO/model.pkl'

In [None]:
tech_indicators = ['macd',
 'rsi_30',
 'cci_30',
 'dx_30']

### Data provider and transformer

In [None]:
from neo_finrl.data_processors.processor_yahoofinance import YahooFinanceProcessor

In [None]:
data_downloader = YahooFinanceProcessor()

### Data Extraction 

In [None]:
stock_history_df = data_downloader.download_data(start_date, end_date, tic_list['tic'], '1D')

In [None]:
if history_df_name != None:
    stock_history_df.to_csv(history_df_name, index = False)

In [None]:
# simple hack for currency 
for col_i in ['open', 'high', 'low', 'close', 'adjcp']:
    stock_history_df.loc[stock_history_df.tic.str.endswith('.SI'), col_i] = \
    stock_history_df.loc[stock_history_df.tic.str.endswith('.SI'), col_i]/1.3

    stock_history_df.loc[stock_history_df.tic.str.endswith('.HK'), col_i] = \
    stock_history_df.loc[stock_history_df.tic.str.endswith('.HK'), col_i]/7.8

### Preprocess data

In [None]:
stock_data_df = data_downloader.clean_data(stock_history_df)
stock_data_df = data_downloader.add_technical_indicator(stock_data_df, tech_indicators)
stock_data_df = data_downloader.add_turbulence(stock_data_df)

In [None]:
stock_data_df.to_csv('cleaned_stock.csv', index = False)

### Create env

In [None]:
stock_data_df = pd.read_csv('cleaned_stock.csv')

In [None]:
price_array, tech_array, risk_array = data_downloader.df_to_array_fix(stock_data_df, \
                                                                  tech_indicator_list= tech_indicators, \
                                                                  if_vix = False)

In [None]:
import numpy as np

In [None]:
from neo_finrl.env_stock_trading.env_stock_trading import StockTradingEnv

In [None]:
config = dict()

config['price_array'] = price_array[:train_test_split_index]
config['tech_array'] = tech_array[:train_test_split_index]
config['risk_array'] = risk_array[:train_test_split_index]
config['if_train'] = True

initial_account = 1e5
# set high threshold to avoid whole sell
risk_thresh = np.nanmax(risk_array) + 1

In [None]:
config['price_array'].shape, config['tech_array'].shape, config['risk_array'].shape

In [None]:
stock_env = StockTradingEnv(config, \
                            initial_account=initial_account, \
                            risk_thresh=risk_thresh)

### Test RL

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

In [None]:
env_train = DummyVecEnv([lambda : stock_env])

model = PPO("MlpPolicy", env_train, learning_rate=0.00025, 
                        n_steps=2048, batch_size=128, ent_coef=0.0, 
                        gamma=0.99, seed=312)

In [None]:
model.learn(total_timesteps=1e4, tb_log_name = 'ppo')
print('Training finished!')

In [None]:
model.save(cwd)
print('Trained model saved in ' + str(cwd))

### Backtesting

In [None]:
#test on the testing env
def testRun(model, env_instance):
    state = env_instance.reset()
    episode_returns = list()  # the cumulative_return / initial_account
    done = False

    while not done:
        action = model.predict(state)[0]
        state, reward, done, _ = env_instance.step(action)
    
        total_asset = env_instance.amount + (env_instance.price_ary[env_instance.day] * env_instance.stocks).sum()
        episode_return = total_asset / env_instance.initial_total_asset
        episode_returns.append(episode_return)
        
    print('episode_return', episode_return)
    print('Test Finished!')  
    return episode_returns

In [None]:
test_config = dict()

test_config['price_array'] = price_array[train_test_split_index:]
test_config['tech_array'] = tech_array[train_test_split_index:]
test_config['risk_array'] = risk_array[train_test_split_index:]
test_config['if_train'] = False

initial_account = 1e5
# set high threshold to avoid whole sell
risk_thresh = np.nanmax(risk_array) + 1

In [None]:
test_env = StockTradingEnv(test_config, \
                            initial_account=initial_account, \
                            risk_thresh=risk_thresh)
test_model = PPO.load(cwd)

In [None]:
cumulative_return = testRun(test_model, test_env)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.plot(cumulative_return, label='agent return')
plt.grid()
plt.title('cumulative return')
plt.xlabel('time')