 # Installing FinRL

In [None]:
## install finrl library
# %pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

#TODO: must study more stock market concepts

 # Making required directories

In [None]:
from finrl import config
from finrl import config_tickers
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

 # Importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime

# %matplotlib inline
#from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.finrl_meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.finrl_meta.data_processor import DataProcessor

from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from pprint import pprint

import sys
sys.path.append("../FinRL-Library")

import itertools
import torch

 # Reading stock prices

In [None]:
df = pd.read_csv('stock_prices.csv.gz')

df_2 = df.loc[df['SecuritiesCode'].isin([1301, 1332, 1333, 1376, 1377, 1379, 1381, 1407, 1414, 1417])] # Security codes represents stock name

In [None]:
df_2

 # Extracting the required columns for Feature Engineering

In [None]:
df_c = df_2[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'SecuritiesCode']]
df_c

df_c.to_csv('export_dataframe.csv', index = None, header=True) 

In [None]:
result = df_c.dtypes
result

 # Exporing the extracted columns to new csv

In [None]:
df_3 = pd.read_csv('export_dataframe.csv')
df_3

 # Converting the Securities Code from int64 to string object

In [None]:
df_3['SecuritiesCode'] = df_3['SecuritiesCode'].apply(str)

In [None]:
df_3.sort_values(['Date','SecuritiesCode'],ignore_index=True).tail(200)

In [None]:
result_4 = df_3.dtypes
result_4

 # Renaming the column names because it is required by Feature Engineering

In [None]:
df_3.rename(columns = {'Date':'date', 'Open':'open', 'High':'high', 'Low':'low', 'Close':'close', 'Volume':'volume', 'SecuritiesCode':'tic'}, inplace = True)
df_3

 # Preprocessing
 ## Applying Feature Engineering on the dataset

In [None]:
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.INDICATORS,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df_3)

In [None]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

In [None]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

 # Splitting the data

In [None]:
train = data_split(processed_full, '2017-01-01','2020-07-01')
trade = data_split(processed_full, '2020-07-01','2021-10-31')

 # Defining the Environment

In [None]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

In [None]:
config.INDICATORS

In [None]:
train.tic.unique()

In [None]:
SEED = 33

torch.manual_seed(SEED)
import random
random.seed(SEED)
np.random.seed(SEED)

In [None]:
#num_stock_shares = [0] * stock_dimension from https://github.com/AI4Finance-Foundation/FinRL/blob/master/FinRL_StockTrading_NeurIPS_2018.ipynb

env_kwargs = {
    "hmax": 100, 
    # initial_amount = 1000000, and hold no shares at beginning.
    #"initial_list": [1000000] + [0 for i in range(stock_dimension)],
    "initial_amount": 130000000, # In YEN
    "num_stock_shares": [0] * stock_dimension,
    # buy and sell cost for each stock
    "buy_cost_pct": [0.001] * stock_dimension,
    "sell_cost_pct": [0.001] * stock_dimension,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.INDICATORS, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}
import gym
class ObservationWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def observation(self, obs):
        # modify obs
        return np.array(obs)

e_train_gym = ObservationWrapper(StockTradingEnv(df = train, **env_kwargs))
e_train_gym.seed(SEED)

In [None]:
e_train_gym.action_space

In [None]:
import d3rlpy

In [None]:
BUFFER_LENGTH = 823
random_policy = d3rlpy.algos.RandomPolicy()
buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=BUFFER_LENGTH, env=e_train_gym)
random_policy.collect(e_train_gym, buffer, n_steps=BUFFER_LENGTH)
# export as MDPDataset
dataset = buffer.to_mdp_dataset()
# save MDPDataset
dataset.dump("random_policy_dataset.h5")

In [None]:
dataset.load("random_policy_dataset.h5")

In [None]:
# encoder factory
from d3rlpy.models.encoders import VectorEncoderFactory
encoder_factory = VectorEncoderFactory(hidden_units=[256, 256, 256])
optim_factory = d3rlpy.models.optimizers.RMSpropFactory()

cql = d3rlpy.algos.CQL(scaler='standard', 
    use_gpu=True, 
    gamma=0.95, 
    batch_size=128, 
    actor_encoder_factory = encoder_factory, 
    critic_encoder_factory = encoder_factory, 
    actor_optim_factory = optim_factory,
    critic_optim_factory = optim_factory,
    temp_optim_factory = optim_factory,
    alpha_optim_factory = optim_factory,
    actor_learning_rate=5e-5, 
    critic_learning_rate=1e-4)
# iql = d3rlpy.algos.IQL(scaler='standard', 
#     use_gpu=True, 
#     gamma=0.96, 
#     batch_size=128, 
#     actor_encoder_factory = encoder_factory, 
#     critic_encoder_factory = encoder_factory, 
#     value_encoder_factory = encoder_factory,
#     actor_optim_factory = optim_factory,
#     critic_optim_factory = optim_factory,
#     actor_learning_rate=4e-5,
#     critic_learning_rate=8e-5,
#     )
BASE_EPOCHS = 15

# feed as MDPDataset
cql.fit(dataset, n_epochs=BASE_EPOCHS, shuffle=True, verbose=False)

In [None]:
NUM_COLLECTIONS = int((50000/823))
NUM_CHKPOINTS = BASE_EPOCHS
for _ in range(NUM_COLLECTIONS-1): # -1 because of initialization
    #collect more data
    cql.collect(e_train_gym, buffer, n_steps=BUFFER_LENGTH)
    #extend dataset
    dataset.extend(buffer.to_mdp_dataset())
    #retrain
    cql.fit(dataset, n_epochs=BASE_EPOCHS, save_interval=NUM_CHKPOINTS, verbose=False)

In [None]:
data_risk_indicator = processed_full[(processed_full.date<'2020-07-01') & (processed_full.date>='2009-01-01')]
insample_risk_indicator = data_risk_indicator.drop_duplicates(subset=['date'])

In [None]:
insample_risk_indicator.vix.describe()

In [None]:
insample_risk_indicator.vix.quantile(0.996)

In [None]:
insample_risk_indicator.turbulence.describe()

In [None]:
insample_risk_indicator.turbulence.quantile(0.996)

In [None]:
#trade = data_split(processed_full, '2020-07-01','2021-10-31')
e_trade_gym = ObservationWrapper(StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs))
e_train_gym.seed(SEED)
# env_trade, obs_trade = e_trade_gym.get_sb_env()

In [None]:
# reload model
# cql = d3rlpy.algos.cql.CQL.from_json("./CQL_20220606173412/params.json")

In [None]:
class DRLAgentLessArg(DRLAgent):
    @staticmethod
    def DRL_prediction(model, environment):
        test_env, test_obs = environment.get_sb_env()
        """make a prediction"""
        account_memory = []
        actions_memory = []
#         state_memory=[] #add memory pool to store states
        test_env.reset()
        for i in range(len(environment.df.index.unique())):
            
            action = model.predict(test_obs)
            # account_memory = test_env.env_method(method_name="save_asset_memory")
            # actions_memory = test_env.env_method(method_name="save_action_memory")
            test_obs, rewards, dones, info = test_env.step(action)
            if i == (len(environment.df.index.unique()) - 2):
                account_memory = test_env.env_method(method_name="save_asset_memory")
                actions_memory = test_env.env_method(method_name="save_action_memory")
#                 state_memory=test_env.env_method(method_name="save_state_memory") # add current state to state memory
            if dones[0]:
                print("hit end!")
                break
        return account_memory[0], actions_memory[0]   

df_account_value, df_actions = DRLAgentLessArg.DRL_prediction(
    model=cql, 
    environment = e_trade_gym)

In [None]:
df_account_value.shape

In [None]:
df_account_value.tail()

In [None]:
# import pyfolio
# with pyfolio.plotting.plotting_context(font_scale=1.1):
#     pyfolio.create_full_tear_sheet(returns = df_account_value,
#     benchmark_rets=dow_strat, set_context=False)

In [None]:
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

In [None]:
df_date = df_account_value.set_index("date")

In [None]:
df_date.index = pd.to_datetime(df_date.index)

In [None]:
%matplotlib inline

In [None]:
df_date.plot(grid=True)

In [None]:
df_date.to_csv("CQL_account_value_seed_33.csv.gz")

In [None]:
backtest_plot(account_value=df_account_value)