In [None]:
import datetime
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')

import datetime

from finrl import config
from finrl import config_tickers
from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader

from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline,convert_daily_return_to_pyfolio_ts

from Environment import StockPortfolioEnv as env
import sys


import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

df = YahooDownloader(start_date = '2008-01-01',
                     end_date = '2022-06-01',
                     ticker_list = config_tickers.DOW_30_TICKER).fetch_data()

fe = FeatureEngineer(
    use_technical_indicator=True,
    use_turbulence=False,
    user_defined_feature=False
)

df = fe.preprocess_data(df)
# add covariance matrix as states
df=df.sort_values(['date','tic'],ignore_index=True)
df.index = df.date.factorize()[0]
cov_list = []
return_list = []

# look back is one year
lookback=252
for i in range(lookback,len(df.index.unique())):
  data_lookback = df.loc[i-lookback:i,:]
  price_lookback=data_lookback.pivot_table(index = 'date',columns = 'tic', values = 'close')
  return_lookback = price_lookback.pct_change().dropna()
  return_list.append(return_lookback)

  covs = return_lookback.cov().values 
  cov_list.append(covs)
df_cov = pd.DataFrame({'date':df.date.unique()[lookback:],'cov_list':cov_list,'return_list':return_list})
df = df.merge(df_cov, on='date')
df = df.sort_values(['date','tic']).reset_index(drop=True)
train = data_split(df, '2009-04-01','2016-04-01')

In [None]:
stock_dimension = len(train.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
tech_indicator_list = ['macd', 'rsi_30', 'cci_30', 'dx_30']
feature_dimension = len(tech_indicator_list)
print(f"Feature Dimension: {feature_dimension}")
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-1
    
}



In [None]:
train = data_split(df, '2009-04-01','2016-06-30')
validation = data_split(df,'2016-06-30','2019-06-30')
test_set = data_split(df,'2019-07-01','2022-06-31')

e_train_gym = env(df = train, **env_kwargs)
e_validation_gym = env(df = validation,**env_kwargs)
e_test_gym = env(df=test_set,**env_kwargs)
env_train, test_obs = e_train_gym.get_sb_env()

In [None]:
from stable_baselines3 import A2C,PPO,DDPG


trained_a2c_1 = A2C.load('trained_models/5_Oct/a2c_1')
trained_ppo_1 = PPO.load('trained_models/5_Oct/ppo_2')
trained_ddpg_1 = DDPG.load('trained_models/5_Oct/ddpg_1')

trained_a2c_2 = A2C.load('trained_models/5_Oct/a2c_2')
trained_ppo_2 = PPO.load('trained_models/5_Oct/ppo_2')
trained_ddpg_2 = DDPG.load('trained_models/5_Oct/ddpg_2')



trained_a2c_3 = A2C.load('trained_models/5_Oct/a2c_3')
trained_ppo_3 = PPO.load('trained_models/5_Oct/ppo_3')
trained_ddpg_3 = DDPG.load('trained_models/5_Oct/ddpg_3')

trained_a2c_4 = A2C.load('trained_models/5_Oct/a2c_4')
trained_ppo_4 = PPO.load('trained_models/5_Oct/ppo_4')
trained_ddpg_4 = DDPG.load('trained_models/5_Oct/ddpg_4')

trained_a2c_5 = A2C.load('trained_models/5_Oct/a2c_5')
trained_ppo_5 = PPO.load('trained_models/5_Oct/ppo_5')
trained_ddpg_5 = DDPG.load('trained_models/5_Oct/ddpg_5')

trained_a2c_6 = A2C.load('trained_models/5_Oct/a2c_6')
trained_ppo_6 = PPO.load('trained_models/5_Oct/ppo_6')
trained_ddpg_6 = DDPG.load('trained_models/5_Oct/ddpg_6')


trained_a2c_7= A2C.load('trained_models/5_Oct/a2c_7')
trained_ppo_7 = PPO.load('trained_models/5_Oct/ppo_7')
trained_ddpg_7 = DDPG.load('trained_models/5_Oct/ddpg_7')

trained_a2c_8 = A2C.load('trained_models/5_Oct/a2c_8')
trained_ppo_8 = PPO.load('trained_models/5_Oct/ppo_8')
trained_ddpg_8 = DDPG.load('trained_models/5_Oct/ddpg_8')

trained_a2c_9 = A2C.load('trained_models/5_Oct/a2c_9')
trained_ppo_9 = PPO.load('trained_models/5_Oct/ppo_9')
trained_ddpg_9 = DDPG.load('trained_models/5_Oct/ddpg_9')


trained_a2c_10= A2C.load('trained_models/5_Oct/a2c_seed_10')
trained_ppo_10 = PPO.load('trained_models/5_Oct/ppo_10')
trained_ddpg_10 = DDPG.load('trained_models/5_Oct/ddpg_10')
pool_agent_1 = [trained_a2c_1,trained_ppo_1,trained_ddpg_1]
pool_agent_2 = [trained_a2c_2,trained_ppo_2,trained_ddpg_2]
pool_agent_3 = [trained_a2c_3,trained_ppo_3,trained_ddpg_3]
pool_agent_4 = [trained_a2c_4,trained_ppo_4,trained_ddpg_4]
pool_agent_5 = [trained_a2c_5,trained_ppo_5,trained_ddpg_5]
pool_agent_6 = [trained_a2c_6,trained_ppo_6,trained_ddpg_6]
pool_agent_7 = [trained_a2c_7,trained_ppo_7,trained_ddpg_7]
pool_agent_8 = [trained_a2c_8,trained_ppo_8,trained_ddpg_8]
pool_agent_9 = [trained_a2c_9,trained_ppo_9,trained_ddpg_9]
pool_agent_10 = [trained_a2c_10,trained_ppo_10,trained_ddpg_10]

pool_agents = [pool_agent_1,pool_agent_2,pool_agent_4,pool_agent_5,pool_agent_6,pool_agent_7,pool_agent_8,
              pool_agent_9,pool_agent_10,pool_agent_3]




trained_a2c = [trained_a2c_1,trained_a2c_2,trained_a2c_4,trained_a2c_3,trained_a2c_5,
              trained_a2c_6,trained_a2c_7,trained_a2c_8,trained_a2c_9,trained_a2c_10]
              

trained_ppo = [trained_ppo_1,trained_ppo_2,trained_ppo_4,trained_ppo_3,trained_ppo_5,
              trained_ppo_6,trained_ppo_7,trained_ppo_8,trained_ppo_9,trained_ppo_10]
              
trained_ddpg =[trained_ddpg_1,trained_ddpg_2,trained_ddpg_3,trained_ddpg_4,trained_ddpg_5,
              trained_ddpg_6,trained_ddpg_7,trained_ddpg_8,trained_ddpg_9,trained_ddpg_10]




In [None]:
def experiment_single(trained_agent,env):
    test_env, test_obs = env.get_sb_env()

    account_memory = []
    actions_memory = []
    #         state_memory=[] #add memory pool to store states
    test_env.reset()
    for i in range(len(env.df.index.unique())):
        obs_tensor=th.tensor(test_obs)
        #print(trained_agent.policy._predict(obs_tensor,deterministic=True))
        action_a2c_ensemble, _states_a2c_ensemble = trained_agent.predict(test_obs, deterministic=True)
        #print(action_a2c_ensemble)
        test_obs, rewards, dones, info = test_env.step(np.array(action_a2c_ensemble))
        #print(i)
        if i == (len(env.df.index.unique()) - 2):

            account_memory = test_env.env_method(method_name="save_asset_memory")
            actions_memory = test_env.env_method(method_name="save_action_memory")
    #                 state_memory=test_env.env_method(method_name="save_state_memory") # add current state to state memory
        if dones[0]:
            print("hit end!")
            break
    return account_memory,actions_memory

In [None]:
def dataCenter(actions):
    while len(actions) >2 :
            mean = np.mean(actions,axis=0)

            distance = list(range(len(actions)))
            for k in range(0,len(distance)) :
                distance[k] = np.linalg.norm(mean-actions[k])

            #print(distance)
            actions.pop(distance.index(max(distance)))

    action = np.mean(actions,axis=0)
    
    return action

In [None]:
import torch as th
def test_ensemble(ensemble_type,env,trained_agents):
    test_env,test_obs = env.get_sb_env()
    
    account_memory = []
    actions_memory = []
    
    a2c_agent = trained_agents[0]
    ppo_agent = trained_agents[1]
    ddpg_agent = trained_agents[2]
    
    for s in range(len(env.df.index.unique())):
        obs_tensor=th.tensor(test_obs,requires_grad=False)#.cuda()
        obs_flatten =th.flatten(obs_tensor).cpu().detach().numpy()
        
        a2c_action = a2c_agent.predict(test_obs,deterministic=True)[0]
        ppo_action = ppo_agent.predict(test_obs,deterministic=True)[0]
        ddpg_action = ddpg_agent.predict(test_obs,deterministic=True)[0]
        
        join_prediction=np.concatenate((np.concatenate((a2c_action,ppo_action)),ddpg_action))
        #print(join_prediction)
        
        if ensemble_type=='dataCenter':
            action = dataCenter(join_prediction.tolist())
        
        elif ensemble_type == 'densityBased' :
            
            density_prediction = [calculate_density(p,join_prediction,1) for p in join_prediction]
            action = join_prediction[np.argmax(density_prediction)].tolist()
        
        else:
            action = np.mean(join_prediction,axis=0)
        #action = np.mean(a2c_array,axis=0)
        
        test_obs, rewards, dones, info = test_env.step(np.array([action]))
        #print(i)
        
        if s == (len(env.df.index.unique()) - 2):

            account_memory = test_env.env_method(method_name="save_asset_memory")
            actions_memory = test_env.env_method(method_name="save_action_memory")
    #                 state_memory=test_env.env_method(method_name="save_state_memory") # add current state to state memory
        if dones[0]:
            print("hit end!")
            break
    return account_memory,actions_memory

In [None]:
test_ensemble('dataCenter',e_test_gym,pool_agents[1])