In [22]:
from abc import ABC, abstractmethod
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import DummyVecEnv
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

  and should_run_async(code)


In [30]:
class Environment:
    
    def __init__(self, data, params):
        self.data = data
        self.params = params
        self.portfilio_env = PortfolioEnvironmentClass(self.data, **self.params)

    def environment(self):
        e = DummyVecEnv([lambda: self.portfilio_env])
        obs = e.reset()
        return e, obs

  and should_run_async(code)


In [19]:
class PortfolioEnvironmentClass(Environment):
    def __init__(self, 
                df,
                stock_dim,
                hmax,
                initial_amount,
                transaction_cost_pct,
                reward_scaling,
                state_space,
                action_space,
                tech_indicator_list,
                turbulence_threshold=None,
                lookback=252,
                day = 0):

        self.day = day
        self.lookback=lookback
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.transaction_cost_pct =transaction_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list
    
        # action_space normalization and shape is self.stock_dim
        self.action_space = spaces.Box(low = 0, high = 1,shape = (self.action_space,)) 
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape = (self.state_space+len(self.tech_indicator_list),self.state_space))

        # load data from a pandas dataframe
        self.data = self.df.loc[self.day,:]
        self.covs = self.data['cov_list'].values[0]
        self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
        self.terminal = False     
        self.turbulence_threshold = turbulence_threshold        
        # initalize state: inital portfolio return + individual stock return + individual weights
        self.portfolio_value = self.initial_amount

        # memorize portfolio value each step
        self.asset_memory = [self.initial_amount]
        # memorize portfolio return each step
        self.portfolio_return_memory = [0]
        self.actions_memory=[[1/self.stock_dim]*self.stock_dim]
        self.date_memory=[self.data.date.unique()[0]]

    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique())-1

        if self.terminal:
            df = pd.DataFrame(self.portfolio_return_memory)
            df.columns = ['daily_return']
            plt.plot(df.daily_return.cumsum(),'r')
            plt.savefig('results/cumulative_reward.png')
            plt.close()
            
            plt.plot(self.portfolio_return_memory,'r')
            plt.savefig('results/rewards.png')
            plt.close()

            print("=================================")
            print("begin_total_asset:{}".format(self.asset_memory[0]))           
            print("end_total_asset:{}".format(self.portfolio_value))

            df_daily_return = pd.DataFrame(self.portfolio_return_memory)
            df_daily_return.columns = ['daily_return']
            if df_daily_return['daily_return'].std() !=0:
              sharpe = (252**0.5)*df_daily_return['daily_return'].mean()/ \
                       df_daily_return['daily_return'].std()
              print("Sharpe: ",sharpe)
            print("=================================")
            
            return self.state, self.reward, self.terminal,{}

        else:
            weights = self.softmax_normalization(actions) 
            self.actions_memory.append(weights)
            last_day_memory = self.data

            #load next state
            self.day += 1
            self.data = self.df.loc[self.day,:]
            self.covs = self.data['cov_list'].values[0]
            self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
            portfolio_return = sum(((self.data.close.values / last_day_memory.close.values)-1)*weights)
            log_portfolio_return = np.log(sum((self.data.close.values / last_day_memory.close.values)*weights))
            # update portfolio value
            new_portfolio_value = self.portfolio_value*(1+portfolio_return)
            self.portfolio_value = new_portfolio_value

            # save into memory
            self.portfolio_return_memory.append(portfolio_return)
            self.date_memory.append(self.data.date.unique()[0])            
            self.asset_memory.append(new_portfolio_value)

            # the reward is the new portfolio value or end portfolo value
            self.reward = new_portfolio_value
            

        return self.state, self.reward, self.terminal, {}

    def render(self, mode='human'):
        return self.state

    def softmax_normalization(self, actions):
        numerator = np.exp(actions)
        denominator = np.sum(np.exp(actions))
        softmax_output = numerator/denominator
        return softmax_output

In [9]:
class Agent(ABC):

    @abstractmethod
    def train_model():
        pass

    @abstractmethod
    def predict():
        pass

    @abstractmethod
    def save_model():
        pass
    
    @abstractmethod
    def load_model():
        pass

In [10]:
class ConventionalAgent(Agent, ABC):

    @abstractmethod
    def train_model():
        pass

    @abstractmethod
    def predict():
        pass

    @abstractmethod
    def save_model():
        pass

    @abstractmethod
    def load_model():
        pass
    
    @abstractmethod
    def _return_predict():
        pass

    @abstractmethod
    def _weight_optimization():
        pass

In [11]:
class RLAgent(Agent, ABC):

    @abstractmethod
    def train_model():
        pass

    @abstractmethod
    def predict():
        
        pass

    @abstractmethod
    def save_model():
        pass

    @abstractmethod
    def load_model():
        pass

In [12]:
class A2C(RLAgent):

    def __init__(self, model_params):
        self.model_params = model_params
        self.model = A2C(self.model_params["policy"], self.model_params["environment"], self.model_params["verbose"])
        #self.model = A2C(self.model_params["policy"], self.model_params["environment"], self.model_params["verbose"], self.model_params["n_steps"],
        #                 self.model_params["gamma"], self.model_params["vf_coef"], self.model_params["alpha"], self.model_params["momentum"], 
        #                self.model_params["lr_schedule"], self.model_params["epsilon"]) 

    def train_model(self, train_params):
        self.model =  self.model.learn(total_timesteps = train_params["total_timesteps"], log_interval = train_params["log_interval"])
        return self.model
        
    def predict(self, test_params):

        test_env, test_obs = test_params["environment"].environment()
        """make a prediction"""
        account_memory = []
        actions_memory = []

        test_env.reset()
        for i in range(len(test_params["environment"].df.index.unique())):
            action, _states = self.model.predict(test_obs, deterministic = test_params["deterministic"])
            test_obs, rewards, dones, info = test_env.step(action)
            if i == (len(test_params["environment"].df.index.unique()) - 2):
                account_memory = test_env.env_method(method_name="save_asset_memory")
                actions_memory = test_env.env_method(method_name="save_action_memory")
            if dones[0]:
                print("hit end!")
                break

        return account_memory[0], actions_memory[0]

    def save_model(self, model_name):
        self.model.save(model_name)

    def load_model(self, model_name):
        self.model = self.model.load(model_name)
        return self.model

    

In [20]:
TECHNICAL_INDICATORS_LIST = [
    "macd",
    "boll_ub",
    "boll_lb",
    "rsi_30",
    "cci_30",
    "dx_30",
    "close_30_sma",
    "close_60_sma",
]
env_kwargs = {
    "hmax": 100, #maximum number of shares to trade
    "initial_amount": 1000000, # initial cash
    "transaction_cost_pct": 0.001, # transaction cost percentage
    "state_space": 29, # number of unique stocks 
    "stock_dim": 29, # number of unique stocks
    "tech_indicator_list": TECHNICAL_INDICATORS_LIST, # technical indicators
    "action_space": 29, # number of stocks in training data
    "reward_scaling": 1e-1  #hyperparameter
}

In [None]:
e_train_gym = Environment(data = train, params = env_kwargs) #train parametresi training data olucak (dataframe) daha koyulmadi 
env_train, _ = e_train_gym.environment()

e_trade_gym = Environment(data = trade, params = env_kwargs) #trade parametresi test data olucak (dataframe) daha koyulmadi
env_train, _ = e_train_gym.environment()

In [None]:
a2c_params = {"policy" : "MlpPolicy", 
                "environment" : e_train_gym,
                "verbose" : 1}

train_params = {"total_timesteps": 25000,
                "log_interval": 100}

test_params = {"environment" : e_train_gym,
               "deterministic" : True}

In [None]:
#object creation
a2c = A2C(a2c_params)

In [None]:
#training
a2c.train_model(train_params)

In [None]:
#predicting
a2c.predict(test_params)

In [None]:
#saving 
a2c.save_model("a2c_model")

In [None]:
#loading
loaded_a2c_model = a2c.load_model("a2c_model")