In [1]:
from abc import ABC, abstractmethod
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C as sb_A2C
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yaml
import gym
from datetime import datetime
from sys import path
from os.path import dirname as dir

path.append(dir(path[0]))
print(path)
#__package__ = "examples"

from FinancialEnvLayer.datacollector import CustomDatasetImporter
from FinancialEnvLayer.datacollector import DataDownloader
from FinancialEnvLayer.dataprocessor import FeatureEngineer

['/Users/doganparlak/Desktop/Master_2.2/Master_Project/uniFi_github/uniFi/AgentLayer', '/usr/local/Cellar/python@3.8/3.8.8_1/Frameworks/Python.framework/Versions/3.8/lib/python38.zip', '/usr/local/Cellar/python@3.8/3.8.8_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8', '/usr/local/Cellar/python@3.8/3.8.8_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/lib-dynload', '', '/usr/local/lib/python3.8/site-packages', '/usr/local/lib/python3.8/site-packages/selenium-3.141.0-py3.8.egg', '/usr/local/lib/python3.8/site-packages/urllib3-1.26.4-py3.8.egg', '/Users/doganparlak/Desktop/Master_2.2/Master_Project/uniFi_github/uniFi']


In [2]:
class Environment(gym.Env, ABC):

    @abstractmethod
    def reset(self):
        pass

    @abstractmethod
    def step(self, action):
        pass

    @abstractmethod
    def render(self, mode="human"):
        pass

    @abstractmethod
    def get_env(self):
        pass

    @staticmethod
    def softmax_normalization(actions):
        numerator = np.exp(actions)
        denominator = np.sum(np.exp(actions))
        softmax_output = numerator / denominator
        return softmax_output


In [3]:
class Agent(ABC):

    @abstractmethod
    def train_model():
        pass

    @abstractmethod
    def predict():
        pass

    @abstractmethod
    def save_model():
        pass
    
    @abstractmethod
    def load_model():
        pass

In [4]:
class ConventionalAgent(Agent, ABC):

    @abstractmethod
    def train_model():
        pass

    @abstractmethod
    def predict():
        pass

    @abstractmethod
    def save_model():
        pass

    @abstractmethod
    def load_model():
        pass
    
    @abstractmethod
    def _return_predict():
        pass

    @abstractmethod
    def _weight_optimization():
        pass

In [5]:
class RLAgent(Agent, ABC):

    @abstractmethod
    def train_model():
        pass

    @abstractmethod
    def predict():
        
        pass

    @abstractmethod
    def save_model():
        pass

    @abstractmethod
    def load_model():
        pass

In [54]:
class PortfolioEnv(Environment):

    def __init__(self,
                 df: pd.DataFrame,  # input data
                 stock_dim: int,  # number of unique securities in the investment universe
                 hmax: float,  # maximum number of shares to trade
                 initial_amount: float,  # initial cash value
                 transaction_cost_pct: float,  # transaction cost percentage per trade
                 reward_scaling: float,  # scaling factor for reward as training progresses
                 state_space: int,  # the dimension of input features (state space)
                 action_space: int,  # number of actions, which is equal to portfolio dimension
                 tech_indicator_list: list,  # a list of technical indicator names
                 turbulence_threshold=None,  # a threshold to control risk aversion
                 lookback=252,  #
                 day=0):  # an increment number to control date

        self.df = df
        self.day = day
        self.lookback = lookback
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.transaction_cost_pct = transaction_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list

        # action_space normalization and shape is self.stock_dim
        self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                            shape=(self.state_space + len(self.tech_indicator_list), self.state_space))
        

        # load data from a pandas dataframe
        
        ##FINRL APPROACH
        #self.df.set_index("date", drop = True, inplace=True)
        
        self.data = self.df.loc[self.day,:]
        self.covs = self.data['cov_list'].values[0]
        self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)

        """
        self.state = np.zeros((len(self.tech_indicator_list) + self.stock_dim, self.stock_dim))
        for i in range(self.stock_dim):
            self.data = self.df.loc[self.day, :] 
            self.covs = self.data['cov_list'][i]
            temp_stock_state = np.append(np.array(self.covs), [self.data[tech] for tech in self.tech_indicator_list], axis=0)
            self.state[:,i] = temp_stock_state
            self.day = self.day + 1 #each row represents a single stock, in total to skip a day we should increment this value by #stocks amount
            #print(temp_stock_state)
        print(self.state)
        """

        self.terminal = False
        #self.turbulence_threshold = turbulence_threshold
        # initalize state: initial portfolio return + individual stock return + individual weights
        self.portfolio_value = self.initial_amount

        # memorize portfolio value each step
        self.asset_memory = [self.initial_amount]
        # memorize portfolio return each step
        self.portfolio_return_memory = [0]
        self.actions_memory = [[1 / self.stock_dim] * self.stock_dim]
        self.date_memory = [self.data["date"]]

    def reset(self):
        print("----RESET---")
        self.asset_memory = [self.initial_amount]
        self.day = 0

        """"
        FINRL APPROACH
        self.data = self.df.loc[self.day,:]
        # load states
        self.covs = self.data['cov_list'][0]
        self.state =  np.append(np.array(self.covs), [self.data[tech] for tech in self.tech_indicator_list], axis=0)
        """
        
        self.state = np.zeros((len(self.tech_indicator_list) + self.stock_dim, self.stock_dim))
        for i in range(self.stock_dim):
            self.data = self.df.loc[self.day, :] 
            self.covs = self.data['cov_list'][i]
            temp_stock_state = np.append(np.array(self.covs), [self.data[tech] for tech in self.tech_indicator_list], axis=0)
            self.state[:,i] = temp_stock_state
            self.day = self.day + 1 #each row represents a single stock, in total to skip a day we should increment this value by #stocks amount
            #print(temp_stock_state)
        print(self.state.shape)
        self.portfolio_value = self.initial_amount
        #self.cost = 0
        #self.trades = 0
        self.terminal = False 
        self.portfolio_return_memory = [0]
        self.actions_memory=[[1/self.stock_dim] * self.stock_dim]
        self.date_memory=[self.data["date"]] 
        return self.state

    def step(self, actions):
        print("----STEP---")
        self.terminal = self.day >= len(self.df.index.unique()) - 1
        if self.terminal:
            df = pd.DataFrame(self.portfolio_return_memory)
            df.columns = ['daily_return']
            plt.plot(df.daily_return.cumsum(), 'r')
            plt.savefig('results/cumulative_reward.png')
            plt.close()

            plt.plot(self.portfolio_return_memory, 'r')
            plt.savefig('results/rewards.png')
            plt.close()

            print("=================================")
            print("begin_total_asset:{}".format(self.asset_memory[0]))
            print("end_total_asset:{}".format(self.portfolio_value))

            df_daily_return = pd.DataFrame(self.portfolio_return_memory)
            df_daily_return.columns = ['daily_return']
            if df_daily_return['daily_return'].std() != 0:
                sharpe = (252 ** 0.5) * df_daily_return['daily_return'].mean() / \
                         df_daily_return['daily_return'].std()
                print("Sharpe: ", sharpe)
            print("=================================")

            return self.state, self.reward, self.terminal, {}

        else:
            weights = Environment.softmax_normalization(actions)
            self.actions_memory.append(weights)
            last_day_memory = self.data

            # load next state
            """"
            FINRL APPROACH
            """
            self.day += 1
            self.data = self.df.loc[self.day, :]
            print(self.data["tic"])
            self.covs = self.data['cov_list'][0]
            self.state = np.append(np.array(self.covs), [self.data[tech] for tech in self.tech_indicator_list],
                                   axis=0)
            portfolio_return = sum(((self.data.close.values / last_day_memory.close.values) - 1) * weights)
            log_portfolio_return = np.log(sum((self.data.close.values / last_day_memory.close.values) * weights))
            # update portfolio value
            new_portfolio_value = self.portfolio_value * (1 + portfolio_return)
            self.portfolio_value = new_portfolio_value

            # save into memory
            self.portfolio_return_memory.append(portfolio_return)
            self.date_memory.append(self.data["date"].unique()[0])
            self.asset_memory.append(new_portfolio_value)

            # the reward is the new portfolio value or end portfolo value
            self.reward = new_portfolio_value

        return self.state, self.reward, self.terminal, {}

    def render(self, mode='human'):
        print("---RENDER---")
        return self.state

    def save_asset_memory(self):
        print("---ASSET_MEMORY---")
        date_list = self.date_memory
        portfolio_return = self.portfolio_return_memory
        # print(len(date_list))
        # print(len(asset_list))
        df_account_value = pd.DataFrame({'date': date_list, 'daily_return': portfolio_return})
        return df_account_value

    def save_action_memory(self):
        print("---ACTION_MEMORY---")
        # date and close price length must match actions length
        date_list = self.date_memory
        df_date = pd.DataFrame(date_list)
        df_date.columns = ['date']

        action_list = self.actions_memory
        df_actions = pd.DataFrame(action_list)
        df_actions.columns = self.data.tic.values
        df_actions.index = df_date.date
        # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
        return df_actions

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_env(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs

In [7]:
class A2C(RLAgent):

    def __init__(self,
                 policy= "MlpPolicy",
                 env= None,
                 learning_rate: float = 7e-4,
                 n_steps: int = 5,
                 gamma: float = 0.99,
                 gae_lambda: float = 1.0,
                 ent_coef: float = 0.0,
                 vf_coef: float = 0.5,
                 max_grad_norm: float = 0.5,
                 rms_prop_eps: float = 1e-5,
                 use_rms_prop: bool = True,
                 use_sde: bool = False,
                 sde_sample_freq: int = -1,
                 normalize_advantage: bool = False,
                 tensorboard_log=None,
                 create_eval_env: bool = False,
                 policy_kwargs=None,
                 verbose: int = 0,
                 seed=None,
                 device="auto",
                 _init_setup_model: bool = True):

        self.env = env
        # self.model = A2C(model_params["policy"], model_params["environment"], model_params["verbose"])
        self.model = sb_A2C(policy = policy,
                            env=self.env,
                            learning_rate = learning_rate,
                            n_steps = n_steps,
                            gamma = gamma,
                            gae_lambda= gae_lambda,
                            ent_coef = ent_coef,
                            vf_coef = vf_coef,
                            max_grad_norm = max_grad_norm,
                            rms_prop_eps= rms_prop_eps,
                            use_rms_prop= use_rms_prop,
                            use_sde= use_sde,
                            sde_sample_freq= sde_sample_freq,
                            normalize_advantage= normalize_advantage,
                            tensorboard_log=tensorboard_log,  
                            create_eval_env= create_eval_env,
                            policy_kwargs=policy_kwargs,
                            verbose=verbose,
                            seed=seed,
                            device= device,
                            _init_setup_model = _init_setup_model)

    def train_model(self, **train_params):
        self.model = self.model.learn(**train_params)
        return self.model

    def predict(self, **test_params):

        test_env, test_obs = test_params["environment"].environment()
        """make a prediction"""
        account_memory = []
        actions_memory = []

        test_env.reset()
        for i in range(len(test_params["environment"].df.index.unique())):
            action, _states = self.model.predict(test_obs, deterministic=test_params["deterministic"])
            test_obs, rewards, dones, info = test_env.step(action)
            if i == (len(test_params["environment"].df.index.unique()) - 2):
                account_memory = test_env.env_method(method_name="save_asset_memory")
                actions_memory = test_env.env_method(method_name="save_action_memory")
            if dones[0]:
                print("hit end!")
                break

        return account_memory[0], actions_memory[0]

    def save_model(self, path):
        self.model.save(path)

    def load_model(self, path):
        self.model = self.model.load(path)
        return self.model

In [8]:
#Gather user parameters
with open("../user_params.yaml", "r") as stream:
    try:
        user_params = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

tickers = user_params["tickers"]
env_kwargs = user_params["env_params"]

In [9]:
print("\nTest 3: Downloading from Yahoo.........")
downloaded_df = DataDownloader.download_data(start_date='2009-01-01',
                                                end_date='2021-10-31',
                                                ticker_list=tickers)
print(downloaded_df.head(10))


Test 3: Downloading from Yahoo.........
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (12924, 8)
         date       open       high        low      close      volume   tic  \
0  2008-12-31   3.070357   3.133571   3.047857   2.606277   607541200  AAPL   
1  2008-12-31  41.590000  43.049999  41.500000  32.005886     5443100    BA   
2  2008-12-31  43.700001  45.099998  43.700001  30.628838     6277400   CAT   
3  2008-12-31  72.900002  74.629997  72.900002  43.670769     9964300   CVX   
4  2009-01-02   3.067143   3.251429   3.041429   2.771173   746015200  AAPL   
5  2009-01-02  42.799999  45.560001  42.779999  33.941097     7010200    BA   
6  2009-01-02  44.910000  46.980000  44.709999  32.164730     7117200   CAT   
7  2009-01-02  74.2

In [10]:
print("\nTest 4: Feature engineer.........")
df_processed = FeatureEngineer.add_features(df= downloaded_df,
                                            use_default= True,
                                            tech_indicator_list= env_kwargs["tech_indicator_list"],
                                            use_vix=True,
                                            use_turbulence=True,
                                            user_defined_feature=True)  # included technical indicators as features

print(df_processed.head())


Test 4: Feature engineer.........
Successfully added technical indicators
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3231, 8)
Successfully added vix
Successfully added turbulence index
Successfully added user defined features
Successfully added covariances
        date   tic       open       high        low      close       volume  \
0 2009-12-31  AAPL   7.611786   7.619643   7.520000   6.434926  352410800.0   
1 2009-12-31    BA  55.000000  55.220001  54.049999  42.180119    2189400.0   
2 2009-12-31   CAT  57.599998  57.959999  56.990002  40.802940    3859700.0   
3 2009-12-31   CVX  77.720001  77.779999  76.930000  47.191082    4246600.0   
4 2010-01-04  AAPL   7.622500   7.660714   7.585000   6.535086  493729600.0   

   day      macd    boll_ub  ...     rsi_30      cci_30      dx_30  \
0  3.0  0.105229   6.531584  ...  60.410574  155.827604  31.312031   
1  3.0  0.448060  43.940993  ...  54.118705   17.483370   4.580979   
2  3.0  0

In [53]:
env = PortfolioEnv(df=df_processed, **env_kwargs) 
#env.step([[1/4] * 4])

             tic        open        high         low       close       volume  \
date                                                                            
2009-12-31  AAPL    7.611786    7.619643    7.520000    6.434926  352410800.0   
2009-12-31    BA   55.000000   55.220001   54.049999   42.180119    2189400.0   
2009-12-31   CAT   57.599998   57.959999   56.990002   40.802940    3859700.0   
2009-12-31   CVX   77.720001   77.779999   76.930000   47.191082    4246600.0   
2010-01-04  AAPL    7.622500    7.660714    7.585000    6.535086  493729600.0   
...          ...         ...         ...         ...         ...          ...   
2021-10-27   CVX  113.220001  113.580002  111.650002  109.638428    9096100.0   
2021-10-28  AAPL  149.820007  153.169998  149.720001  151.930588  100077900.0   
2021-10-28    BA  206.000000  208.740005  204.600006  207.850006    8825500.0   
2021-10-28   CAT  197.360001  204.500000  197.050003  202.135101    4462700.0   
2021-10-28   CVX  111.580002

KeyError: 0

In [45]:
env_train, _ = env.get_env()

----RESET---
(12, 4)
[[ 4.56627846e-04  4.56627846e-04  4.56627846e-04  4.56627846e-04]
 [ 2.58923298e-04  2.58923298e-04  2.58923298e-04  2.58923298e-04]
 [ 3.77467031e-04  3.77467031e-04  3.77467031e-04  3.77467031e-04]
 [ 1.97077776e-04  1.97077776e-04  1.97077776e-04  1.97077776e-04]
 [ 1.05228710e-01  4.48059866e-01  4.38584583e-02  9.45451765e-03]
 [ 6.53158431e+00  4.39409929e+01  4.23002432e+01  4.79791936e+01]
 [ 5.63348818e+00  4.17790000e+01  4.03502298e+01  4.69349998e+01]
 [ 6.04105745e+01  5.41187047e+01  5.18726017e+01  5.24399205e+01]
 [ 1.55827604e+02  1.74833702e+01 -7.43669000e+01 -6.98197586e+01]
 [ 3.13120307e+01  4.58097896e+00  6.79185412e+00  6.76338114e+00]
 [ 6.10564211e+00  4.22110231e+01  4.15260661e+01  4.76655006e+01]
 [ 6.04818295e+00  4.06901130e+01  4.10253094e+01  4.70486051e+01]]


In [34]:
env_train.action_space

Box([0. 0. 0. 0.], [1. 1. 1. 1.], (4,), float32)

In [35]:
train_params = user_params["train_params"]
policy_params = user_params["policy_params"]

In [36]:
train_params

{'A2C_PARAMS': {'total_timesteps': 1000,
  'callback': None,
  'log_interval': 100,
  'eval_env': None,
  'eval_freq': -1,
  'n_eval_episodes': 5,
  'tb_log_name': 'A2C',
  'eval_log_path': None,
  'reset_num_timesteps': True}}

In [37]:
#object creation
a2c = A2C(env = env_train, **policy_params["A2C_PARAMS"])

In [38]:
#training
a2c.train_model(**train_params["A2C_PARAMS"])

----RESET---
(12, 4)
[[ 4.56627846e-04  4.56627846e-04  4.56627846e-04  4.56627846e-04]
 [ 2.58923298e-04  2.58923298e-04  2.58923298e-04  2.58923298e-04]
 [ 3.77467031e-04  3.77467031e-04  3.77467031e-04  3.77467031e-04]
 [ 1.97077776e-04  1.97077776e-04  1.97077776e-04  1.97077776e-04]
 [ 1.05228710e-01  4.48059866e-01  4.38584583e-02  9.45451765e-03]
 [ 6.53158431e+00  4.39409929e+01  4.23002432e+01  4.79791936e+01]
 [ 5.63348818e+00  4.17790000e+01  4.03502298e+01  4.69349998e+01]
 [ 6.04105745e+01  5.41187047e+01  5.18726017e+01  5.24399205e+01]
 [ 1.55827604e+02  1.74833702e+01 -7.43669000e+01 -6.98197586e+01]
 [ 3.13120307e+01  4.58097896e+00  6.79185412e+00  6.76338114e+00]
 [ 6.10564211e+00  4.22110231e+01  4.15260661e+01  4.76655006e+01]
 [ 6.04818295e+00  4.06901130e+01  4.10253094e+01  4.70486051e+01]]
----STEP---


AttributeError: 'numpy.float64' object has no attribute 'values'

In [None]:
#predicting
a2c.predict(test_params)

In [None]:
#saving 
a2c.save_model("a2c_model")

In [None]:
#loading
loaded_a2c_model = a2c.load_model("a2c_model")