In [50]:
import numpy as np
import pandas as pd
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pickle

# shares normalization factor
# 100 shares per trade
HMAX_NORMALIZE = 100
# initial amount of money we have in our account
INITIAL_ACCOUNT_BALANCE = 1000000
# total number of stocks in our portfolio
STOCK_DIM = 30
# transaction fee: 1/1000 reasonable percentage
TRANSACTION_FEE_PERCENT = 0.001

# turbulence index: 90-150 reasonable threshold
# TURBULENCE_THRESHOLD = 140
REWARD_SCALING = 1e-4

class StockEnvTrade(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df, day=0, turbulence_threshold=140, initial=True, previous_state=None, model_name='', iteration=''):
        self.day = day
        self.df = df
        self.initial = initial
        self.previous_state = previous_state if previous_state is not None else [INITIAL_ACCOUNT_BALANCE] + [0] * (STOCK_DIM * 2)
        self.action_space = spaces.Box(low=-1, high=1, shape=(STOCK_DIM,))
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(181,))
        self.data = self.df.loc[self.day,:]
        self.terminal = False
        self.turbulence_threshold = turbulence_threshold
        self.state = [INITIAL_ACCOUNT_BALANCE] + self.data.adjcp.values.tolist() + [0]*STOCK_DIM + self.data.macd.values.tolist() + self.data.rsi.values.tolist() + self.data.cci.values.tolist() + self.data.adx.values.tolist()
        self.reward = 0
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        self.rewards_memory = []
        self.model_name = model_name
        self.iteration = iteration
        self._seed()

    def _sell_stock(self, index, action):
        if self.turbulence < self.turbulence_threshold:
            if self.state[index+STOCK_DIM+1] > 0:
                self.state[0] += self.state[index+1]*min(abs(action), self.state[index+STOCK_DIM+1]) * (1 - TRANSACTION_FEE_PERCENT)
                self.state[index+STOCK_DIM+1] -= min(abs(action), self.state[index+STOCK_DIM+1])
                self.cost += self.state[index+1]*min(abs(action), self.state[index+STOCK_DIM+1]) * TRANSACTION_FEE_PERCENT
                self.trades += 1
        else:
            if self.state[index+STOCK_DIM+1] > 0:
                self.state[0] += self.state[index+1]*self.state[index+STOCK_DIM+1]* (1 - TRANSACTION_FEE_PERCENT)
                self.state[index+STOCK_DIM+1] = 0
                self.cost += self.state[index+1]*self.state[index+STOCK_DIM+1]* TRANSACTION_FEE_PERCENT
                self.trades += 1
    
    def _buy_stock(self, index, action):
        if self.turbulence < self.turbulence_threshold:
            available_amount = self.state[0] // self.state[index+1]
            self.state[0] -= self.state[index+1]*min(available_amount, action)* (1 + TRANSACTION_FEE_PERCENT)
            self.state[index+STOCK_DIM+1] += min(available_amount, action)
            self.cost += self.state[index+1]*min(available_amount, action)* TRANSACTION_FEE_PERCENT
            self.trades += 1
    
    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique()) - 1
        if self.terminal:
            plt.plot(self.asset_memory, 'r')
            plt.savefig('/kaggle/working/account_value_trade_{}_{}.png'.format(self.model_name, self.iteration))
            plt.close()
            df_total_value = pd.DataFrame(self.asset_memory)
            df_total_value.to_csv('/kaggle/working/account_value_trade_{}_{}.csv'.format(self.model_name, self.iteration))
            end_total_asset = self.state[0] + sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            print("previous_total_asset:{}".format(self.asset_memory[0]))
            print("end_total_asset:{}".format(end_total_asset))
            print("total_reward:{}".format(end_total_asset - self.asset_memory[0]))
            print("total_cost: ", self.cost)
            print("total trades: ", self.trades)
            df_total_value.columns = ['account_value']
            df_total_value['daily_return'] = df_total_value.pct_change(1)
            sharpe = (4**0.5)*df_total_value['daily_return'].mean() / df_total_value['daily_return'].std()
            print("Sharpe: ", sharpe)
            df_rewards = pd.DataFrame(self.rewards_memory)
            df_rewards.to_csv('/kaggle/working/account_rewards_trade_{}_{}.csv'.format(self.model_name, self.iteration))
            return self.state, self.reward, self.terminal, {}
        else:
            actions = actions * HMAX_NORMALIZE
            if self.turbulence >= self.turbulence_threshold:
                actions = np.array([-HMAX_NORMALIZE]*STOCK_DIM)
            begin_total_asset = self.state[0] + sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            argsort_actions = np.argsort(actions)
            sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]]
            buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]]
            for index in sell_index:
                self._sell_stock(index, actions[index])
            for index in buy_index:
                self._buy_stock(index, actions[index])
            self.day += 1
            self.data = self.df.loc[self.day,:]
            self.turbulence = self.data['turbulence'].values[0]
            self.state = [self.state[0]] + self.data.adjcp.values.tolist() + list(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]) + self.data.macd.values.tolist() + self.data.rsi.values.tolist() + self.data.cci.values.tolist() + self.data.adx.values.tolist()
            end_total_asset = self.state[0] + sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            self.asset_memory.append(end_total_asset)
            self.reward = end_total_asset - begin_total_asset
            self.rewards_memory.append(self.reward)
            self.reward = self.reward * REWARD_SCALING
            return self.state, self.reward, self.terminal, {}

    def reset(self):
        if self.initial or not self.previous_state:
            self.state = [INITIAL_ACCOUNT_BALANCE] + self.data.adjcp.values.tolist() + [0]*STOCK_DIM + self.data.macd.values.tolist() + self.data.rsi.values.tolist() + self.data.cci.values.tolist() + self.data.adx.values.tolist()
            self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        else:
            if self.previous_state is None:
                self.previous_state = [INITIAL_ACCOUNT_BALANCE] + [0] * STOCK_DIM * 2  
            previous_total_asset = self.previous_state[0] + sum(np.array(self.previous_state[1:(STOCK_DIM+1)])*np.array(self.previous_state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            self.asset_memory = [previous_total_asset]
            self.state = [self.previous_state[0]] + self.data.adjcp.values.tolist() + self.previous_state[(STOCK_DIM+1):(STOCK_DIM*2+1)] + self.data.macd.values.tolist() + self.data.rsi.values.tolist() + self.data.cci.values.tolist() + self.data.adx.values.tolist()
        self.day = 0
        self.data = self.df.loc[self.day,:]
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.terminal = False
        self.rewards_memory = []
        return self.state

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


In [30]:


class StockEnvTrain(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df,day = 0):
        #super(StockEnv, self).__init__()
        #money = 10 , scope = 1
        self.day = day
        self.df = df

        # action_space normalization and shape is STOCK_DIM
        self.action_space = spaces.Box(low = -1, high = 1,shape = (STOCK_DIM,)) 
        # Shape = 181: [Current Balance]+[prices 1-30]+[owned shares 1-30] 
        # +[macd 1-30]+ [rsi 1-30] + [cci 1-30] + [adx 1-30]
        self.observation_space = spaces.Box(low=0, high=np.inf, shape = (181,))
        # load data from a pandas dataframe
        self.data = self.df.loc[self.day,:]
        self.terminal = False             
        # initalize state
        self.state = [INITIAL_ACCOUNT_BALANCE] + \
                      self.data.adjcp.values.tolist() + \
                      [0]*STOCK_DIM + \
                      self.data.macd.values.tolist() + \
                      self.data.rsi.values.tolist() + \
                      self.data.cci.values.tolist() + \
                      self.data.adx.values.tolist()
        # initialize reward
        self.reward = 0
        self.cost = 0
        # memorize all the total balance change
        self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        self.rewards_memory = []
        self.trades = 0
        #self.reset()
        self._seed()


    def _sell_stock(self, index, action):
        # perform sell action based on the sign of the action
        if self.state[index+STOCK_DIM+1] > 0:
            #update balance
            self.state[0] += \
            self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \
             (1- TRANSACTION_FEE_PERCENT)

            self.state[index+STOCK_DIM+1] -= min(abs(action), self.state[index+STOCK_DIM+1])
            self.cost +=self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \
             TRANSACTION_FEE_PERCENT
            self.trades+=1
        else:
            pass

    
    def _buy_stock(self, index, action):
        # perform buy action based on the sign of the action
        available_amount = self.state[0] // self.state[index+1]
        # print('available_amount:{}'.format(available_amount))

        #update balance
        self.state[0] -= self.state[index+1]*min(available_amount, action)* \
                          (1+ TRANSACTION_FEE_PERCENT)

        self.state[index+STOCK_DIM+1] += min(available_amount, action)

        self.cost+=self.state[index+1]*min(available_amount, action)* \
                          TRANSACTION_FEE_PERCENT
        self.trades+=1
        
    def step(self, actions):
        # print(self.day)
        self.terminal = self.day >= len(self.df.index.unique())-1
        # print(actions)

        if self.terminal:
            plt.plot(self.asset_memory,'r')
            plt.savefig('/kaggle/working/account_value_train.png')
            plt.close()
            end_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            
            #print("end_total_asset:{}".format(end_total_asset))
            df_total_value = pd.DataFrame(self.asset_memory)
            df_total_value.to_csv('/kaggle/working/account_value_train.csv')
            #print("total_reward:{}".format(self.state[0]+sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):61]))- INITIAL_ACCOUNT_BALANCE ))
            #print("total_cost: ", self.cost)
            #print("total_trades: ", self.trades)
            df_total_value.columns = ['account_value']
            df_total_value['daily_return']=df_total_value.pct_change(1)
            sharpe = (252**0.5)*df_total_value['daily_return'].mean()/ \
                  df_total_value['daily_return'].std()
            #print("Sharpe: ",sharpe)
            #print("=================================")
            df_rewards = pd.DataFrame(self.rewards_memory)
            #df_rewards.to_csv('/kaggle/working/account_rewards_train.csv')
            
            # print('total asset: {}'.format(self.state[0]+ sum(np.array(self.state[1:29])*np.array(self.state[29:]))))
            #with open('obs.pkl', 'wb') as f:  
            #    pickle.dump(self.state, f)
            
            return self.state, self.reward, self.terminal,{}

        else:
            # print(np.array(self.state[1:29]))

            actions = actions * HMAX_NORMALIZE
            #actions = (actions.astype(int))
            
            begin_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            #print("begin_total_asset:{}".format(begin_total_asset))
            
            argsort_actions = np.argsort(actions)
            
            sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]]
            buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]]

            for index in sell_index:
                # print('take sell action'.format(actions[index]))
                self._sell_stock(index, actions[index])

            for index in buy_index:
                # print('take buy action: {}'.format(actions[index]))
                self._buy_stock(index, actions[index])

            self.day += 1
            self.data = self.df.loc[self.day,:]         
            #load next state
            # print("stock_shares:{}".format(self.state[29:]))
            self.state =  [self.state[0]] + \
                    self.data.adjcp.values.tolist() + \
                    list(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]) + \
                    self.data.macd.values.tolist() + \
                    self.data.rsi.values.tolist() + \
                    self.data.cci.values.tolist() + \
                    self.data.adx.values.tolist()
            
            end_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            self.asset_memory.append(end_total_asset)
            #print("end_total_asset:{}".format(end_total_asset))
            
            self.reward = end_total_asset - begin_total_asset            
            # print("step_reward:{}".format(self.reward))
            self.rewards_memory.append(self.reward)
            
            self.reward = self.reward*REWARD_SCALING



        return self.state, self.reward, self.terminal, {}

    def reset(self):
        self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        self.day = 0
        self.data = self.df.loc[self.day,:]
        self.cost = 0
        self.trades = 0
        self.terminal = False 
        self.rewards_memory = []
        #initiate state
        self.state = [INITIAL_ACCOUNT_BALANCE] + \
                      self.data.adjcp.values.tolist() + \
                      [0]*STOCK_DIM + \
                      self.data.macd.values.tolist() + \
                      self.data.rsi.values.tolist() + \
                      self.data.cci.values.tolist() + \
                      self.data.adx.values.tolist() 
        # iteration += 1 
        return self.state
    
    def render(self, mode='human'):
        return self.state

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

In [40]:
#env stock validation

class StockEnvValidation(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df, day = 0, turbulence_threshold=140, iteration=''):
        #super(StockEnv, self).__init__()
        #money = 10 , scope = 1
        self.day = day
        self.df = df
        # action_space normalization and shape is STOCK_DIM
        self.action_space = spaces.Box(low = -1, high = 1,shape = (STOCK_DIM,)) 
        # Shape = 181: [Current Balance]+[prices 1-30]+[owned shares 1-30] 
        # +[macd 1-30]+ [rsi 1-30] + [cci 1-30] + [adx 1-30]
        self.observation_space = spaces.Box(low=0, high=np.inf, shape = (181,))
        # load data from a pandas dataframe
        self.data = self.df.loc[self.day,:]
        self.terminal = False     
        self.turbulence_threshold = turbulence_threshold
        # initalize state
        self.state = [INITIAL_ACCOUNT_BALANCE] + \
                      self.data.adjcp.values.tolist() + \
                      [0]*STOCK_DIM + \
                      self.data.macd.values.tolist() + \
                      self.data.rsi.values.tolist() + \
                      self.data.cci.values.tolist() + \
                      self.data.adx.values.tolist()
        # initialize reward
        self.reward = 0
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        # memorize all the total balance change
        self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        self.rewards_memory = []
        #self.reset()
        self._seed()
        
        self.iteration=iteration


    def _sell_stock(self, index, action):
        # perform sell action based on the sign of the action
        if self.turbulence<self.turbulence_threshold:
            if self.state[index+STOCK_DIM+1] > 0:
                #update balance
                self.state[0] += \
                self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \
                 (1- TRANSACTION_FEE_PERCENT)
                
                self.state[index+STOCK_DIM+1] -= min(abs(action), self.state[index+STOCK_DIM+1])
                self.cost +=self.state[index+1]*min(abs(action),self.state[index+STOCK_DIM+1]) * \
                 TRANSACTION_FEE_PERCENT
                self.trades+=1
            else:
                pass
        else:
            # if turbulence goes over threshold, just clear out all positions 
            if self.state[index+STOCK_DIM+1] > 0:
                #update balance
                self.state[0] += self.state[index+1]*self.state[index+STOCK_DIM+1]* \
                              (1- TRANSACTION_FEE_PERCENT)
                self.state[index+STOCK_DIM+1] =0
                self.cost += self.state[index+1]*self.state[index+STOCK_DIM+1]* \
                              TRANSACTION_FEE_PERCENT
                self.trades+=1
            else:
                pass
    
    def _buy_stock(self, index, action):
        # perform buy action based on the sign of the action
        if self.turbulence< self.turbulence_threshold:
            available_amount = self.state[0] // self.state[index+1]
            # print('available_amount:{}'.format(available_amount))
            
            #update balance
            self.state[0] -= self.state[index+1]*min(available_amount, action)* \
                              (1+ TRANSACTION_FEE_PERCENT)

            self.state[index+STOCK_DIM+1] += min(available_amount, action)
            
            self.cost+=self.state[index+1]*min(available_amount, action)* \
                              TRANSACTION_FEE_PERCENT
            self.trades+=1
        else:
            # if turbulence goes over threshold, just stop buying
            pass
        
    def step(self, actions):
        # print(self.day)
        self.terminal = self.day >= len(self.df.index.unique())-1
        # print(actions)

        if self.terminal:
            plt.plot(self.asset_memory,'r')
            plt.savefig('/kaggle/working/account_value_validation_{}.png'.format(self.iteration))
            plt.close()
            df_total_value = pd.DataFrame(self.asset_memory)
            df_total_value.to_csv('/kaggle/working/account_value_validation_{}.csv'.format(self.iteration))
            end_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            #print("previous_total_asset:{}".format(self.asset_memory[0]))           

            #print("end_total_asset:{}".format(end_total_asset))
            #print("total_reward:{}".format(self.state[0]+sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):61]))- self.asset_memory[0] ))
            #print("total_cost: ", self.cost)
            #print("total trades: ", self.trades)

            df_total_value.columns = ['account_value']
            df_total_value['daily_return']=df_total_value.pct_change(1)
            sharpe = (4**0.5)*df_total_value['daily_return'].mean()/ \
                  df_total_value['daily_return'].std()
            #print("Sharpe: ",sharpe)
            
            #df_rewards = pd.DataFrame(self.rewards_memory)
            #df_rewards.to_csv('/kaggle/working/account_rewards_trade_{}.csv'.format(self.iteration))
            
            # print('total asset: {}'.format(self.state[0]+ sum(np.array(self.state[1:29])*np.array(self.state[29:]))))
            #with open('obs.pkl', 'wb') as f:  
            #    pickle.dump(self.state, f)
            
            return self.state, self.reward, self.terminal,{}

        else:
            # print(np.array(self.state[1:29]))

            actions = actions * HMAX_NORMALIZE
            #actions = (actions.astype(int))
            if self.turbulence>=self.turbulence_threshold:
                actions=np.array([-HMAX_NORMALIZE]*STOCK_DIM)
            begin_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            #print("begin_total_asset:{}".format(begin_total_asset))
            
            argsort_actions = np.argsort(actions)
            
            sell_index = argsort_actions[:np.where(actions < 0)[0].shape[0]]
            buy_index = argsort_actions[::-1][:np.where(actions > 0)[0].shape[0]]

            for index in sell_index:
                # print('take sell action'.format(actions[index]))
                self._sell_stock(index, actions[index])

            for index in buy_index:
                # print('take buy action: {}'.format(actions[index]))
                self._buy_stock(index, actions[index])

            self.day += 1
            self.data = self.df.loc[self.day,:]         
            self.turbulence = self.data['turbulence'].values[0]
            #print(self.turbulence)
            #load next state
            # print("stock_shares:{}".format(self.state[29:]))
            self.state =  [self.state[0]] + \
                    self.data.adjcp.values.tolist() + \
                    list(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]) + \
                    self.data.macd.values.tolist() + \
                    self.data.rsi.values.tolist() + \
                    self.data.cci.values.tolist() + \
                    self.data.adx.values.tolist()
            
            end_total_asset = self.state[0]+ \
            sum(np.array(self.state[1:(STOCK_DIM+1)])*np.array(self.state[(STOCK_DIM+1):(STOCK_DIM*2+1)]))
            self.asset_memory.append(end_total_asset)
            #print("end_total_asset:{}".format(end_total_asset))
            
            self.reward = end_total_asset - begin_total_asset            
            # print("step_reward:{}".format(self.reward))
            self.rewards_memory.append(self.reward)
            
            self.reward = self.reward*REWARD_SCALING

        return self.state, self.reward, self.terminal, {}

    def reset(self):  
        self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        self.day = 0
        self.data = self.df.loc[self.day,:]
        self.turbulence = 0
        self.cost = 0
        self.trades = 0
        self.terminal = False 
        #self.iteration=self.iteration
        self.rewards_memory = []
        #initiate state
        self.state = [INITIAL_ACCOUNT_BALANCE] + \
                      self.data.adjcp.values.tolist() + \
                      [0]*STOCK_DIM + \
                      self.data.macd.values.tolist() + \
                      self.data.rsi.values.tolist()  + \
                      self.data.cci.values.tolist()  + \
                      self.data.adx.values.tolist() 
            
        return self.state
    
    def render(self, mode='human',close=False):
        return self.state
    

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

In [41]:
import os
import time
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Adjust this path based on the actual structure of your Kaggle datasets
os.chdir("/kaggle/input/trading/")

from env.EnvMultipleStock_train import StockEnvTrain
from env.EnvMultipleStock_validation import StockEnvValidation
from env.EnvMultipleStock_trade import StockEnvTrade

# Installing stable-baselines3 which is compatible with TensorFlow 2.x
!pip install stable-baselines3

# Importing stable-baselines3 components
from stable_baselines3 import PPO, A2C, DDPG, SAC, TD3
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise





In [20]:
path = '/kaggle/input/trading/trading.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0.1,Unnamed: 0,datadate,tic,adjcp,open,high,low,volume,macd,rsi,cci,adx,turbulence
0,0,20090102,AAPL,12.964286,12.268571,13.005714,12.165714,26641980.0,0.0,100.0,66.666667,100.0,0.0
1,1,20090102,AXP,19.33,18.57,19.52,18.4,10955620.0,0.0,100.0,66.666667,100.0,0.0
2,2,20090102,BA,45.25,42.8,45.56,42.78,7010171.0,0.0,100.0,66.666667,100.0,0.0
3,3,20090102,CAT,46.91,44.91,46.98,44.71,7116726.0,0.0,0.0,66.666667,100.0,0.0
4,4,20090102,CSCO,16.96,16.41,17.0,16.25,40977480.0,0.0,100.0,66.666667,100.0,0.0


In [21]:
rebalance_window = 63
validation_window = 63

In [22]:
unique_trade_date = df[(df.datadate > 20151001)&(df.datadate <= 20200707)].datadate.unique()
print(unique_trade_date)

[20151002 20151005 20151006 ... 20200702 20200706 20200707]


In [52]:
def train_A2C(env_train, model_name, timesteps=10000):
    start = time.time()
    model = A2C('MlpPolicy', env_train, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()

    model.save(f"/kaggle/working/{model_name}")
    print(' - Training time (A2C): ', (end - start) / 60, ' minutes')
    return model

def train_ACER(env_train, model_name, timesteps=10000):
    start = time.time()
    model = ACER('MlpPolicy', env_train, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()

    model.save(f"/kaggle/working/{model_name}")
    print(' - Training time (A2C): ', (end - start) / 60, ' minutes')
    return model

def train_DDPG(env_train, model_name, timesteps=10000):
    # add the noise objects for DDPG
    n_actions = env_train.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

    start = time.time()
    model = DDPG('MlpPolicy', env_train,action_noise=action_noise)
    model.learn(total_timesteps=timesteps)
    end = time.time()

    model.save(f"/kaggle/working/{model_name}")
    print(' - Training time (DDPG): ', (end-start)/60,' minutes')
    return model

def train_PPO(env_train, model_name, timesteps=10000):
    start = time.time()
    model = PPO('MlpPolicy', env_train, ent_coef = 0.005, batch_size = 8)
    
    model.learn(total_timesteps=timesteps)
    end = time.time()

    model.save(f"/kaggle/working/{model_name}")
    print(' - Training time (PPO): ', (end - start) / 60, ' minutes')
    return model

def train_GAIL(env_train, model_name, timesteps=10000):
    start = time.time()
    # generate expert trajectories
    model = SAC('MLpPolicy', env_train, verbose=1)
    generate_expert_traj(model, 'expert_model_gail', n_timesteps=100, n_episodes=10)

    # Load dataset
    dataset = ExpertDataset(expert_path='expert_model_gail.npz', traj_limitation=10, verbose=1)
    model = GAIL('MLpPolicy', env_train, dataset, verbose=1)

    model.learn(total_timesteps=1000)
    end = time.time()

    model.save(f"/kaggle/working/{model_name}")
    print(' - Training time (PPO): ', (end - start) / 60, ' minutes')
    return model

In [42]:
def data_split(df,start,end):
    data = df[(df.datadate >= start) & (df.datadate < end)]
    data=data.sort_values(['datadate','tic'],ignore_index=True)
    data.index = data.datadate.factorize()[0]
    return data

def get_validation_sharpe(iteration):
    df_total_value = pd.read_csv('/kaggle/working/account_value_validation_{}.csv'.format(iteration), index_col=0)
    df_total_value.columns = ['account_value_train']
    df_total_value['daily_return'] = df_total_value.pct_change(1)
    sharpe = (4 ** 0.5) * df_total_value['daily_return'].mean() / \
             df_total_value['daily_return'].std()
    return sharpe

In [43]:
def DRL_prediction(df,
                   model,
                   name,
                   last_state,
                   iter_num,
                   unique_trade_date,
                   rebalance_window,
                   turbulence_threshold,
                   initial):

    trade_data = data_split(df, start=unique_trade_date[iter_num - rebalance_window], end=unique_trade_date[iter_num])
    env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data,
                                                   turbulence_threshold=turbulence_threshold,
                                                   initial=initial,
                                                   previous_state=last_state,
                                                   model_name=name,
                                                   iteration=iter_num)])
    obs_trade = env_trade.reset()

    for i in range(len(trade_data.index.unique())):
        action, _states = model.predict(obs_trade)
        obs_trade, rewards, dones, info = env_trade.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            last_state = env_trade.render()
            
  # Check if last_state is None before trying to create DataFrame
    if last_state is None:
        print("No last state available.")
    else:
        # Create DataFrame from last_state if it is not None
        df_last_state = pd.DataFrame({'last_state': [last_state]})
        df_last_state.to_csv('/kaggle/working/last_state_{}_{}.csv'.format(name, i), index=False)

    return last_state

def DRL_validation(model, test_data, test_env, test_obs) -> None:
    for i in range(len(test_data.index.unique())):
        action, _states = model.predict(test_obs)
        test_obs, rewards, dones, info = test_env.step(action)

In [48]:
def run_ensemble_strategy(df, unique_trade_date, rebalance_window, validation_window) -> None:
    last_state_ensemble =last_state_ensemble = [INITIAL_ACCOUNT_BALANCE] + [1] * STOCK_DIM * 2
    ppo_sharpe_list = []
    ddpg_sharpe_list = []
    a2c_sharpe_list = []

    model_use = []

    insample_turbulence = df[(df.datadate<20151000) & (df.datadate>=20090000)]
    insample_turbulence = insample_turbulence.drop_duplicates(subset=['datadate'])
    insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

    start = time.time()
    for i in range(rebalance_window + validation_window, len(unique_trade_date), rebalance_window):
        if i - rebalance_window - validation_window == 0:
            # inital state
            initial = True
        else:
            # previous state
            initial = False

        # Tuning trubulence index based on historical data
        # Turbulence lookback window is one quarter
        end_date_index = df.index[df["datadate"] == unique_trade_date[i - rebalance_window - validation_window]].to_list()[-1]
        start_date_index = end_date_index - validation_window*30 + 1

        historical_turbulence = df.iloc[start_date_index:(end_date_index + 1), :]
        historical_turbulence = historical_turbulence.drop_duplicates(subset=['datadate'])
        historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

        if historical_turbulence_mean > insample_turbulence_threshold:
            # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
            # then we assume that the current market is volatile,
            turbulence_threshold = insample_turbulence_threshold
        else:
            # if the mean of the historical data is less than the 90% quantile of insample turbulence data
            # then we tune up the turbulence_threshold, meaning we lower the risk
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)
            
        print("-" * 50)
        print(" - Turbulence_threshold: ", turbulence_threshold)

        train = data_split(df, start=20090000, end=unique_trade_date[i - rebalance_window - validation_window])
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        ## validation env
        validation = data_split(df, start=unique_trade_date[i - rebalance_window - validation_window],
                                end=unique_trade_date[i - rebalance_window])
        env_val = DummyVecEnv([lambda: StockEnvValidation(validation,
                                                          turbulence_threshold=turbulence_threshold,
                                                          iteration=i)])
        obs_val = env_val.reset()
        
        print(" - Model training from: ", 20090000, "to ",
              unique_trade_date[i - rebalance_window - validation_window])
        print(" - A2C Training")
        model_a2c = train_A2C(env_train, model_name="A2C_30k_dow_{}".format(i), timesteps=300)
        print(" - A2C Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ",
              unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_a2c, test_data=validation, test_env=env_val, test_obs=obs_val)
        sharpe_a2c = get_validation_sharpe(i)
        print(" - A2C Sharpe Ratio: ", sharpe_a2c)

        print(" - PPO Training")
        model_ppo = train_PPO(env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=100)
        print(" - PPO Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ",
              unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_ppo, test_data=validation, test_env=env_val, test_obs=obs_val)
        sharpe_ppo = get_validation_sharpe(i)
        print(" - PPO Sharpe Ratio: ", sharpe_ppo)

        print(" - DDPG Training")
        model_ddpg = train_DDPG(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=100)
        print(" - DDPG Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ",
              unique_trade_date[i - rebalance_window])
        
        DRL_validation(model=model_ddpg, test_data=validation, test_env=env_val, test_obs=obs_val)
        sharpe_ddpg = get_validation_sharpe(i)

        ppo_sharpe_list.append(sharpe_ppo)
        a2c_sharpe_list.append(sharpe_a2c)
        ddpg_sharpe_list.append(sharpe_ddpg)

        # Model Selection based on sharpe ratio
        if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
            model_ensemble = model_ppo
            model_use.append('PPO')
        elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
            model_ensemble = model_a2c
            model_use.append('A2C')
        else:
            model_ensemble = model_ddpg
            model_use.append('DDPG')

        print(" - Trading from: ", unique_trade_date[i - rebalance_window], "to ", unique_trade_date[i])
        print("-" * 50)
        last_state_ensemble = DRL_prediction(df=df, model=model_ensemble, name="ensemble",
                                             last_state=last_state_ensemble, iter_num=i,
                                             unique_trade_date=unique_trade_date,
                                             rebalance_window=rebalance_window,
                                             turbulence_threshold=turbulence_threshold,
                                             initial=initial)
        
    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")

In [53]:
    run_ensemble_strategy(df=df, 
                          unique_trade_date= unique_trade_date,
                          rebalance_window = rebalance_window,
                          validation_window=validation_window)

--------------------------------------------------
 - Turbulence_threshold:  171.09407156310158
 - Model training from:  20090000 to  20151002
 - A2C Training




 - Training time (A2C):  0.016562227408091226  minutes
 - A2C Validation from:  20151002 to  20160104
 - A2C Sharpe Ratio:  0.014675693824502357
 - PPO Training
 - Training time (PPO):  0.26415040890375774  minutes
 - PPO Validation from:  20151002 to  20160104
 - PPO Sharpe Ratio:  0.005863890642849013
 - DDPG Training
 - Training time (DDPG):  0.20538955132166545  minutes
 - DDPG Validation from:  20151002 to  20160104
 - Trading from:  20160104 to  20160405
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1047399.0622578731
total_reward:47399.06225787313
total_cost:  1105.8038126342635
total trades:  759
Sharpe:  0.14174830479219247
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20160104
 - A2C Training




 - Training time (A2C):  0.016786396503448486  minutes
 - A2C Validation from:  20160104 to  20160405
 - A2C Sharpe Ratio:  0.11936655879017097
 - PPO Training
 - Training time (PPO):  0.26499009927113854  minutes
 - PPO Validation from:  20160104 to  20160405
 - PPO Sharpe Ratio:  -0.016156380904851553
 - DDPG Training
 - Training time (DDPG):  0.21184477011362712  minutes
 - DDPG Validation from:  20160104 to  20160405
 - Trading from:  20160405 to  20160705
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:993192.733603079
total_reward:-6807.266396921012
total_cost:  7737.999142269037
total trades:  1646
Sharpe:  -0.021693370533806006
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  171.09407156310158
 - Model training from:  20090000 to  20160405
 - A2C Training




 - Training time (A2C):  0.01698166529337565  minutes
 - A2C Validation from:  20160405 to  20160705
 - A2C Sharpe Ratio:  0.04985763620574728
 - PPO Training
 - Training time (PPO):  0.26401337385177615  minutes
 - PPO Validation from:  20160405 to  20160705
 - PPO Sharpe Ratio:  0.04217814934610117
 - DDPG Training
 - Training time (DDPG):  0.22125804821650188  minutes
 - DDPG Validation from:  20160405 to  20160705
 - Trading from:  20160705 to  20161003
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:989204.5249667298
total_reward:-10795.475033270195
total_cost:  7328.5287311768025
total trades:  1600
Sharpe:  -0.05641715499535489
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  171.09407156310158
 - Model training from:  20090000 to  20160705
 - A2C Training




 - Training time (A2C):  0.016639522711435952  minutes
 - A2C Validation from:  20160705 to  20161003
 - A2C Sharpe Ratio:  -0.030766682291314768
 - PPO Training
 - Training time (PPO):  0.268160871664683  minutes
 - PPO Validation from:  20160705 to  20161003
 - PPO Sharpe Ratio:  -0.07850385622001146
 - DDPG Training
 - Training time (DDPG):  0.22980987230936686  minutes
 - DDPG Validation from:  20160705 to  20161003
 - Trading from:  20161003 to  20170103
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1060933.9665769811
total_reward:60933.966576981125
total_cost:  1018.591437698212
total trades:  930
Sharpe:  0.3826348899067964
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  171.09407156310158
 - Model training from:  20090000 to  20161003
 - A2C Training




 - Training time (A2C):  0.017708202203114826  minutes
 - A2C Validation from:  20161003 to  20170103
 - A2C Sharpe Ratio:  0.49206532069714276
 - PPO Training
 - Training time (PPO):  0.2745556275049845  minutes
 - PPO Validation from:  20161003 to  20170103
 - PPO Sharpe Ratio:  0.3920177567274364
 - DDPG Training
 - Training time (DDPG):  0.23857700030008952  minutes
 - DDPG Validation from:  20161003 to  20170103
 - Trading from:  20170103 to  20170404
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1043543.3674400002
total_reward:43543.36744000018
total_cost:  998.9825599999999
total trades:  992
Sharpe:  0.3209867147086355
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20170103
 - A2C Training




 - Training time (A2C):  0.018127687772115073  minutes
 - A2C Validation from:  20170103 to  20170404
 - A2C Sharpe Ratio:  0.37525455431490295
 - PPO Training
 - Training time (PPO):  0.2691780845324198  minutes
 - PPO Validation from:  20170103 to  20170404
 - PPO Sharpe Ratio:  0.16311772825792625
 - DDPG Training
 - Training time (DDPG):  0.24799201091130574  minutes
 - DDPG Validation from:  20170103 to  20170404
 - Trading from:  20170404 to  20170705
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1008585.9262871658
total_reward:8585.926287165843
total_cost:  5838.555008672448
total trades:  1096
Sharpe:  0.09555348839989096
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  171.09407156310158
 - Model training from:  20090000 to  20170404
 - A2C Training




 - Training time (A2C):  0.017054227987925212  minutes
 - A2C Validation from:  20170404 to  20170705
 - A2C Sharpe Ratio:  0.2362256083138897
 - PPO Training
 - Training time (PPO):  0.26678810914357504  minutes
 - PPO Validation from:  20170404 to  20170705
 - PPO Sharpe Ratio:  0.16634563641972533
 - DDPG Training
 - Training time (DDPG):  0.2563031196594238  minutes
 - DDPG Validation from:  20170404 to  20170705
 - Trading from:  20170705 to  20171003
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1050394.1765600007
total_reward:50394.17656000075
total_cost:  2024.9490800000003
total trades:  853
Sharpe:  0.32954298330373905
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  171.09407156310158
 - Model training from:  20090000 to  20170705
 - A2C Training




 - Training time (A2C):  0.01775979995727539  minutes
 - A2C Validation from:  20170705 to  20171003
 - A2C Sharpe Ratio:  0.5188823131043487
 - PPO Training
 - Training time (PPO):  0.2661457379659017  minutes
 - PPO Validation from:  20170705 to  20171003
 - PPO Sharpe Ratio:  0.22596844088550488
 - DDPG Training
 - Training time (DDPG):  0.26235279242197673  minutes
 - DDPG Validation from:  20170705 to  20171003
 - Trading from:  20171003 to  20180103
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1078223.5323531546
total_reward:78223.53235315462
total_cost:  8207.673620126392
total trades:  1558
Sharpe:  0.5505479626327834
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20171003
 - A2C Training




 - Training time (A2C):  0.01681136687596639  minutes
 - A2C Validation from:  20171003 to  20180103
 - A2C Sharpe Ratio:  0.494659504059884
 - PPO Training
 - Training time (PPO):  0.2666251262029012  minutes
 - PPO Validation from:  20171003 to  20180103
 - PPO Sharpe Ratio:  0.29135959473852613
 - DDPG Training
 - Training time (DDPG):  0.268434743086497  minutes
 - DDPG Validation from:  20171003 to  20180103
 - Trading from:  20180103 to  20180405
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1004119.4234964085
total_reward:4119.423496408504
total_cost:  1953.1170879905446
total trades:  330
Sharpe:  0.047480212730167386
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20180103
 - A2C Training




 - Training time (A2C):  0.016847928365071613  minutes
 - A2C Validation from:  20180103 to  20180405
 - A2C Sharpe Ratio:  -0.04568454094870651
 - PPO Training
 - Training time (PPO):  0.2632371107737223  minutes
 - PPO Validation from:  20180103 to  20180405
 - PPO Sharpe Ratio:  0.06386903721034068
 - DDPG Training
 - Training time (DDPG):  0.2750851551691691  minutes
 - DDPG Validation from:  20180103 to  20180405
 - Trading from:  20180405 to  20180705
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:977884.6837345366
total_reward:-22115.316265463363
total_cost:  6656.192976891095
total trades:  1103
Sharpe:  -0.16745257567232677
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20180405
 - A2C Training




 - Training time (A2C):  0.01720150311787923  minutes
 - A2C Validation from:  20180405 to  20180705
 - A2C Sharpe Ratio:  0.013354825952783233
 - PPO Training
 - Training time (PPO):  0.26643014351526895  minutes
 - PPO Validation from:  20180405 to  20180705
 - PPO Sharpe Ratio:  -0.3057763231171007
 - DDPG Training
 - Training time (DDPG):  0.2846088925997416  minutes
 - DDPG Validation from:  20180405 to  20180705
 - Trading from:  20180705 to  20181003
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1016826.5946764599
total_reward:16826.594676459907
total_cost:  6745.731421713878
total trades:  968
Sharpe:  0.1704225223115562
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20180705
 - A2C Training




 - Training time (A2C):  0.01664732297261556  minutes
 - A2C Validation from:  20180705 to  20181003
 - A2C Sharpe Ratio:  0.11792746112426343
 - PPO Training
 - Training time (PPO):  0.26692943970362343  minutes
 - PPO Validation from:  20180705 to  20181003
 - PPO Sharpe Ratio:  -0.016562824730088004
 - DDPG Training
 - Training time (DDPG):  0.2899098078409831  minutes
 - DDPG Validation from:  20180705 to  20181003
 - Trading from:  20181003 to  20190104
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1016830.874
total_reward:16830.873999999953
total_cost:  1100.038
total trades:  153
Sharpe:  0.3067368688156179
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  171.09407156310158
 - Model training from:  20090000 to  20181003
 - A2C Training




 - Training time (A2C):  0.01717563470204671  minutes
 - A2C Validation from:  20181003 to  20190104
 - A2C Sharpe Ratio:  -0.37729731099296093
 - PPO Training
 - Training time (PPO):  0.2789430101712545  minutes
 - PPO Validation from:  20181003 to  20190104
 - PPO Sharpe Ratio:  -0.36906507741040967
 - DDPG Training
 - Training time (DDPG):  0.3011738657951355  minutes
 - DDPG Validation from:  20181003 to  20190104
 - Trading from:  20190104 to  20190405
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1053381.583016004
total_reward:53381.58301600395
total_cost:  10468.953345899643
total trades:  1662
Sharpe:  0.2659148530402895
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20190104
 - A2C Training




 - Training time (A2C):  0.01779555082321167  minutes
 - A2C Validation from:  20190104 to  20190405
 - A2C Sharpe Ratio:  0.028668940487889732
 - PPO Training
 - Training time (PPO):  0.2710132797559102  minutes
 - PPO Validation from:  20190104 to  20190405
 - PPO Sharpe Ratio:  0.06556549705552368
 - DDPG Training
 - Training time (DDPG):  0.31209014654159545  minutes
 - DDPG Validation from:  20190104 to  20190405
 - Trading from:  20190405 to  20190708
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1005611.5056859982
total_reward:5611.505685998243
total_cost:  952.2232200565021
total trades:  158
Sharpe:  0.45618537954774413
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20190405
 - A2C Training




 - Training time (A2C):  0.0174986998240153  minutes
 - A2C Validation from:  20190405 to  20190708
 - A2C Sharpe Ratio:  0.15727062670980968
 - PPO Training
 - Training time (PPO):  0.2685620824495951  minutes
 - PPO Validation from:  20190405 to  20190708
 - PPO Sharpe Ratio:  0.1476274876720468
 - DDPG Training
 - Training time (DDPG):  0.3226025660832723  minutes
 - DDPG Validation from:  20190405 to  20190708
 - Trading from:  20190708 to  20191004
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:999368.1992300005
total_reward:-631.8007699995069
total_cost:  2535.263799999999
total trades:  256
Sharpe:  -0.01251995641318924
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20190708
 - A2C Training




 - Training time (A2C):  0.01816927989323934  minutes
 - A2C Validation from:  20190708 to  20191004
 - A2C Sharpe Ratio:  -0.162873128640501
 - PPO Training
 - Training time (PPO):  0.27118192513783773  minutes
 - PPO Validation from:  20190708 to  20191004
 - PPO Sharpe Ratio:  -0.04868483389758255
 - DDPG Training
 - Training time (DDPG):  0.32786349058151243  minutes
 - DDPG Validation from:  20190708 to  20191004
 - Trading from:  20191004 to  20200106
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:997969.8529999998
total_reward:-2030.14700000023
total_cost:  579.0099999999999
total trades:  80
Sharpe:  -0.34853158641160187
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20191004
 - A2C Training




 - Training time (A2C):  0.01824939250946045  minutes
 - A2C Validation from:  20191004 to  20200106
 - A2C Sharpe Ratio:  -0.11239309739471691
 - PPO Training
 - Training time (PPO):  0.2716402570406596  minutes
 - PPO Validation from:  20191004 to  20200106
 - PPO Sharpe Ratio:  -0.0970248816478046
 - DDPG Training
 - Training time (DDPG):  0.3332372665405273  minutes
 - DDPG Validation from:  20191004 to  20200106
 - Trading from:  20200106 to  20200406
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:985993.1007988178
total_reward:-14006.899201182183
total_cost:  660.7553013914203
total trades:  163
Sharpe:  -0.46825692604054464
No last state available.
--------------------------------------------------
 - Turbulence_threshold:  96.08032158358223
 - Model training from:  20090000 to  20200106
 - A2C Training




 - Training time (A2C):  0.018073054154713948  minutes
 - A2C Validation from:  20200106 to  20200406
 - A2C Sharpe Ratio:  -0.44265617330638124
 - PPO Training
 - Training time (PPO):  0.27430207331975304  minutes
 - PPO Validation from:  20200106 to  20200406
 - PPO Sharpe Ratio:  -0.4334503489608362
 - DDPG Training
 - Training time (DDPG):  0.33943023284276325  minutes
 - DDPG Validation from:  20200106 to  20200406
 - Trading from:  20200406 to  20200707
--------------------------------------------------




previous_total_asset:1000000
end_total_asset:1005620.8340000004
total_reward:5620.834000000381
total_cost:  672.097
total trades:  63
Sharpe:  0.27361679616776785
No last state available.
Ensemble Strategy took:  10.422946627934774  minutes
