In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from portfolio_env_framework import *
import random
import torch
import numpy.typing as npt

In [20]:
class BasicDataManager(AbstractDataManager):
    def get_obs_space(self) -> gym.spaces.Box:
        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.universe_size+1, 100+2), dtype=np.float32)

    def get_data(self) -> tuple[int, int]:
        # read SNP data
        df = pd.read_csv('crsp_full_2010_2024.csv', dtype='string')
    
        # convert datatypes
        df = df[['date', 'TICKER', 'PRC', 'VOL', 'ASKHI', 'BIDLO', 'FACPR', 'vwretd']]
        df.date = pd.to_datetime(df.date)
        df.FACPR = df.FACPR.fillna('0.0')
        df.astype({
            'PRC': float,
            'VOL': float,
            'ASKHI': float,
            'BIDLO': float,
            'FACPR': float,
            'vwretd': float
        })
    
        # drop duplicates and nans
        df = df.drop_duplicates(subset=['date', 'TICKER'])
        df.dropna(inplace=True)
    
        # only include stocks that are present in all dates
        ticker_ok = df.TICKER.value_counts() == df.TICKER.value_counts().max()
        def is_max_val_count(ticker: str) -> bool:
          return ticker_ok[ticker]
        ok = df.apply(lambda row: is_max_val_count(row['TICKER']), axis=1)
        df = df[ok]
        df = df[(df.date.dt.year >= 2010) & (df.date.dt.year < 2015)]
    
        # create stock array
        self.stock_df = df.pivot(index='date', columns='TICKER', values='PRC').astype(float)
        
        # adjust for stock splits
        facpr_df = df.pivot(index='date', columns='TICKER', values='FACPR').astype(float)
        self.stock_df = self.stock_df * (1+facpr_df).cumprod(axis=0)
        # assert np.all(self.stock_df.pct_change().iloc[1:, :] > -1), f"{(self.stock_df.pct_change().iloc[1:, :] <= -1).sum().sum()=}, {np.any(pd.isna(self.stock_df.pct_change().iloc[1:, :]))}"
        self.ret = np.log(self.stock_df.pct_change().iloc[1:, :] + 1)
    
        # get times and dickers
        self.times = df.date.unique()[1:]
        self.tickers = df.TICKER.unique()
    
        # read index data and compute volatilities
        idx_df = df[['date', 'TICKER', 'vwretd', 'PRC']].copy()
        idx_df.date = pd.to_datetime(idx_df.date)
        idx_df['PRC'] = idx_df['PRC'].astype(float)
        idx_df['vwretd'] = idx_df['PRC'].astype(float)
        idx_df['weights'] = idx_df.groupby('date')['PRC'].transform(lambda x: x / x.sum())
        idx_df['pf_returns'] = (idx_df['weights'] * idx_df['vwretd']) + 1
        idx_df['daily_ret'] = idx_df.groupby('date')['pf_returns'].transform(np.mean)
        idx_df = idx_df.drop_duplicates(subset = ['date'])
        idx_df['vol_20'] = idx_df.daily_ret.rolling(20).std()
        idx_df['vol_60'] = idx_df.daily_ret.rolling(60).std()
        idx_df.set_index('date', inplace=True)
        self.vol_20 = idx_df.vol_20
        self.vol_60 = idx_df.vol_60
        self.idx_df = idx_df
    
        # get vix data
        vix_df = pd.read_csv('crsp_vix_2010_to_2024.csv', dtype={
          'Date': 'string',
          'vix': float
        })
        vix_df.Date = pd.to_datetime(vix_df.Date)
        vix_df.set_index('Date', inplace=True)
        self.vix_df = vix_df.vix
        
        self.num_time_periods = len(self.times)-100-1
        self.universe_size = len(self.tickers)

        #get news sentiment data
        means_df = pd.read_csv('./news_sentiment_data.csv')
        means_df['Date'] = pd.to_datetime(means_df['Date'], errors='coerce')
        means_df = means_df[(means_df.Date.dt.year >= 2010) & (means_df.Date.dt.year < 2011)]
        means_df = means_df.loc[means_df['Ticker'].isin(self.tickers)]
        news_sentiment_array = means_df.pivot(index="Date", columns="Ticker", values="sentiment_embedding")
        news_sentiment_array = news_sentiment_array.resample('D').asfreq()
        no_sentiment_tickers = list(set(self.tickers) - set(news_sentiment_array.columns.values))
        news_sentiment_array[[no_sentiment_tickers]] = np.nan
        self.news_sentiment_array = news_sentiment_array.fillna(0)

        return self.num_time_periods, self.universe_size
    
    def get_state(self, t: int, w: npt.NDArray[np.float64], port_val: np.float64) -> npt.NDArray[np.float64]:
        # today is self.times[self.t+100]
        s = np.zeros((self.universe_size+1, 100+2))
        s[:, 0] = w
        # s[1:, :-1] = self.ret[self.t:self.t+100, :].T
        s[1:, :-2] = self.ret.loc[self.times[t:t+100], :].to_numpy().T # 100 past returns, up to yesterday
        # s[1:, -1] = self.news_sentiment_array.loc[self.times[t+100-1]].values # yesterday's news
        s[1:, -1] = np.zeros(self.universe_size)
        s[-1, 1] = self.vol_20[self.times[t+100-1]] # yesterday's vol_20
        s[-1, 2] = self.vol_20[self.times[t+100-1]] / self.vol_60[self.times[t+100-1]] # yesterday's vol ratio
        s[-1, 3] = self.vix_df[self.times[t+100-1]] # yesterday's vix
        return s

    def get_prices(self, t: int) -> npt.NDArray[np.float64]:
        # today is self.times[self.t+100]
        return np.append(self.stock_df.loc[self.times[t+100], :].to_numpy().flatten(), 1.0)

In [18]:
class DifferentialSharpeRatioReward(AbstractRewardManager):
    def __init__(self, eta: float = 1/252):
        self.eta = eta
        self.initialize_reward()

    def initialize_reward(self):
        self.A, self.B = 0.0, 0.0

    def compute_reward(self, old_port_val: float, new_port_val: float) -> float:
        R = np.log(new_port_val / old_port_val)
        dA = R - self.A
        dB = R ** 2 - self.B
        if self.B - self.A ** 2 == 0:
            D = 0
        else:
            D = (self.B * dA - 0.5 * self.A * dB) / (self.B - self.A ** 2) ** (3 / 2)
        self.A += self.eta * dA
        self.B += self.eta * dB
        return D

In [21]:
# Parallel environments
vec_env = make_vec_env(PortfolioEnvWithTCost, n_envs=4, env_kwargs={
    'dm': BasicDataManager(),
    'rm': DifferentialSharpeRatioReward(),
    'cp': 0.10/365,
    'cs': 0.10/365
})

# Set seeds
random.seed(42)
np.random.seed(42)
vec_env.seed(42)
vec_env.action_space.seed(43)
torch.manual_seed(42)

model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=30_000)
model.save("tcost_portfolio_rl_model_framework_full_news")

  result = func(self.values, **kwargs)
  idx_df['daily_ret'] = idx_df.groupby('date')['pf_returns'].transform(np.mean)
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tickers]] = np.nan
  news_sentiment_array[[no_sentiment_tick

In [None]:
env = PortfolioEnvWithTCost(dm=BasicDataManager(), rm=DifferentialSharpeRatioReward(), cp=0.10/365, cs=0.10/365)
obs, _ = env.reset()

port_val = [1.0]
snp_val = [1.0]
actions = []
ys = []
times_arr = [env.dm.times[env.t+100]]
mus = []

for i in range(900):
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action)
    obs, rewards, terminated, truncated, info = env.step(action)
    ys.append(env.y)
    mus.append(env.mu)
    times_arr.append(env.dm.times[env.t+100])
    port_val.append(info['port_val'])
    snp_val.append(snp_val[-1] * (1 + env.dm.idx_df[env.dm.idx_df.index == env.dm.times[env.t+100]].vwretd.values[0]))
    if terminated:
        break

In [2]:
df = pd.read_csv('crsp_full_2010_2024.csv', dtype='string')
    
# convert datatypes
df = df[['date', 'TICKER', 'PRC', 'VOL', 'ASKHI', 'BIDLO', 'FACPR', 'vwretd']]
df.date = pd.to_datetime(df.date)
df.FACPR = df.FACPR.fillna('0.0')
df.astype({
    'PRC': float,
    'VOL': float,
    'ASKHI': float,
    'BIDLO': float,
    'FACPR': float,
    'vwretd': float
})

# drop duplicates and nans
df = df.drop_duplicates(subset=['date', 'TICKER'])
df.dropna(inplace=True)

# only include stocks that are present in all dates
ticker_ok = df.TICKER.value_counts() == df.TICKER.value_counts().max()
def is_max_val_count(ticker: str) -> bool:
  return ticker_ok[ticker]
ok = df.apply(lambda row: is_max_val_count(row['TICKER']), axis=1)
df = df[ok]
df = df[(df.date.dt.year >= 2010) & (df.date.dt.year < 2015)]

In [5]:
idx_df = df[['date', 'TICKER', 'vwretd', 'PRC']].copy()
idx_df.date = pd.to_datetime(idx_df.date)
idx_df['PRC'] = idx_df['PRC'].astype(float)
idx_df['weights'] = idx_df.groupby('date')['PRC'].transform(lambda x: x / x.sum())

In [7]:
idx_df['vwretd']

3688         0.017123
3689         0.003374
3690         0.002046
3691         0.002860
3692         0.004173
              ...    
16215088     0.000579
16215089     0.003668
16215090     0.001242
16215091    -0.004285
16215092    -0.008784
Name: vwretd, Length: 2733634, dtype: string

In [11]:
idx_df['vwretd'] = idx_df['PRC'].astype(float)
idx_df['weights'] = idx_df.groupby('date')['PRC'].transform(lambda x: x / x.sum())
idx_df['pf_returns'] = (idx_df['weights'] * idx_df['vwretd']) + 1
idx_df['daily_ret'] = idx_df.groupby('date')['pf_returns'].transform(np.mean)

  idx_df['daily_ret'] = idx_df.groupby('date')['pf_returns'].transform(np.mean)


In [12]:
idx_df['pf_returns']

3688        1.025708
3689        1.023776
3690        1.021755
3691        1.022735
3692        1.022899
              ...   
16215088    1.011791
16215089    1.011568
16215090    1.011584
16215091    1.011593
16215092    1.011408
Name: pf_returns, Length: 2733634, dtype: float64

In [13]:
idx_df['daily_ret']

3688        1.043697
3689        1.043490
3690        1.044612
3691        1.042709
3692        1.042787
              ...   
16215088    1.125001
16215089    1.125538
16215090    1.127762
16215091    1.128675
16215092    1.131375
Name: daily_ret, Length: 2733634, dtype: float64

In [14]:
idx_df['vol_20'] = idx_df.daily_ret.rolling(20).std()
idx_df['vol_60'] = idx_df.daily_ret.rolling(60).std()

In [16]:
idx_df['vol_60']

3688             NaN
3689             NaN
3690             NaN
3691             NaN
3692             NaN
              ...   
16215088    0.014115
16215089    0.014131
16215090    0.014192
16215091    0.014257
16215092    0.014372
Name: vol_60, Length: 2733634, dtype: float64