In [2]:
"""
Created on Wed Jun 28 04:57:40 2023
@author: hugo511
"""

import argparse
import pandas as pd
# pd.set_option('display.float_format', lambda x:'%.2f'%x)
import numpy as np
np.set_printoptions(suppress=True)
import time
import warnings
warnings.filterwarnings('ignore')
import os 
import statsmodels.api as sm

parser = argparse.ArgumentParser(description="Asset Pricing - Tail stocks return regression")
parser.add_argument('--dataset_start', type=int, default=196301, help='dataset start date')
parser.add_argument('--dataset_end', type=int, default=201012, help='dataset end date')
parser.add_argument('--qt', type=float, default=0.05, help='quantile value - u_t')
parser.add_argument('--save_dir', type=str, default='../testTailStocksRET_table3/', help='results save dirs')
parser.add_argument('--isTest', type=bool, default=True, help='is Test or not')
parser.add_argument('--Lwindow', type=int, default=1, help='length of rolling window')
parser.add_argument('-f')
args = parser.parse_args()

def cal_aggHorizon_ret(Yret_init, H):
    Yagg = np.array([])
    for idx in range(1, Yret_init.shape[0] - H + 1):
        # print(idx)
        Yagg_idx = (Yret_init[idx: idx + H] + 1).prod() - 1
        Yagg = np.append(Yagg, Yagg_idx)
    return Yagg


if __name__ == '__main__':
    # dataset
    args.dataset_start = 196301
    args.dataset_end = 202212
    
    args.isTest = False
    args.tail_dir = './logs/Hill_estimate_results/'
    # tail lambda_t
    tail_estimates = pd.read_csv(os.path.join(args.tail_dir, f'Hillestimate_plotresults_{args.dataset_start}_{args.dataset_end}.csv'))
    tail_estimates = tail_estimates[(tail_estimates['date'] >= args.dataset_start) & (tail_estimates['date'] <= args.dataset_end)]

    # market return vwretd monthly
    mkt_monthlyret = pd.read_csv('./Dataset/MarketRET_Monthly_192601_202212.csv')
    mkt_monthlyret = mkt_monthlyret[(mkt_monthlyret['date'] >= args.dataset_start) & (mkt_monthlyret['date'] <= args.dataset_end)].reset_index(drop=True)
    
    # equity_predictor 
    equity_predictor = pd.read_csv('./Dataset/EquityPredictorDataset_Monthly_192601_202212.csv')
    equity_predictor = equity_predictor[(equity_predictor['date'] >= args.dataset_start) & (equity_predictor['date'] <= args.dataset_end)].reset_index(drop=True)
        
    # fitting dataset
    fitdataset = mkt_monthlyret[['date', 'vwretd']].merge(tail_estimates[['date', 'lambda_t']], on='date', how='left')
    fitdataset = fitdataset.merge(equity_predictor.iloc[:, :-2], on='date', how='left')
    fitdataset = fitdataset.rename(columns={'lambda_t':'Tail'})
    columns = ['Tail', 'Book-to-market', 'Default return spread', 'Default yield spread',
               'Dividend payout ratio', 'Dividend price ratio', 'Earnings price ratio',
               'Inflation', 'Long-term return', 'Long-term yield',
               'Net equity expansion', 'Stock volatility', 'Term spread', 'Treasury-bill rate']
    for col in columns: # fitdataset.columns[2:]
        fitdataset[col] = (fitdataset[col] - fitdataset[col].mean()) / fitdataset[col].std()

In [3]:
#%% table 3 
Tidx = fitdataset[fitdataset['date'] == 197212].index.item()
regressors = ['Tail', 'Book-to-market', 'Default return spread', 'Default yield spread', 'Dividend payout ratio', 'Dividend price ratio'
              , 'Earnings price ratio', 'Inflation', 'Long-term return', 'Long-term yield', 'Net equity expansion', 'Stock volatility'
              , 'Term spread', 'Treasury-bill rate']

OOS_Yresult = {}
# 1M    
args.Lwindow = 1
fitdataset1M = pd.DataFrame()
fitdataset1M['date'] = fitdataset['date'][:-args.Lwindow]
fitdataset1M['vwretd'] = cal_aggHorizon_ret(fitdataset['vwretd'], H=args.Lwindow)
for col in regressors:
    fitdataset1M[col] = fitdataset[col][:-args.Lwindow]
OOS_Yresult1M = pd.DataFrame()
OOS_Yresult1M[['date', 'vwretd']] = fitdataset1M.loc[Tidx + 1:, ['date', 'vwretd']]
for idx in range(Tidx + 1, fitdataset1M.shape[0]): # start from OOS t=121
    rt_mean = fitdataset1M.loc[:idx - 1 - args.Lwindow, 'vwretd'].mean()
    OOS_Yresult1M.loc[idx, 'vwretdt_mean'] = rt_mean # \bat{r}_{t} idx + 1 for r_{t+1} - \bat{r}_{t}
OOS_Rsquared1M = pd.DataFrame(columns=['Rz.', '1M'])
for regressor in regressors:
    for idx in range(Tidx + 1, fitdataset1M.shape[0]):
        fitY = fitdataset1M.loc[:idx - 1 - args.Lwindow, 'vwretd']
        fitX = fitdataset1M.loc[:idx - 1 - args.Lwindow, f'{regressor}']
        fitX = sm.add_constant(fitX)
        rgmodel = sm.OLS(fitY, fitX).fit()
        Xpred = np.array([1, fitdataset1M.loc[idx, f'{regressor}']])
        Ypred = rgmodel.params @ Xpred
        OOS_Yresult1M.loc[idx, f'{regressor}'] = Ypred
    rsquared = 1 - ((OOS_Yresult1M['vwretd'] - OOS_Yresult1M[f'{regressor}']) ** 2).sum() / ((OOS_Yresult1M['vwretd'] - OOS_Yresult1M['vwretdt_mean']) ** 2 ).sum()
    rsquared = np.round(rsquared * 100, 1)
    OOS_Rsquared1M = OOS_Rsquared1M.append({'Rz.':f'{regressor}', '1M':rsquared}, ignore_index=True)
    print(f'-- finish {regressor} 1M. --')
OOS_Yresult['1M'] = OOS_Yresult1M
print(OOS_Rsquared1M)

# 1Y    
args.Lwindow = 12
fitdataset1Y = pd.DataFrame()
fitdataset1Y['date'] = fitdataset['date'][:-args.Lwindow]
fitdataset1Y['vwretd'] = cal_aggHorizon_ret(fitdataset['vwretd'], H=args.Lwindow)
for col in regressors:
    fitdataset1Y[col] = fitdataset[col][:-args.Lwindow]
OOS_Yresult1Y = pd.DataFrame()
OOS_Yresult1Y[['date', 'vwretd']] = fitdataset1Y.loc[Tidx + 1:, ['date', 'vwretd']]
for idx in range(Tidx + 1, fitdataset1Y.shape[0]): 
    rt_mean = fitdataset1Y.loc[:idx - 1 - args.Lwindow, 'vwretd'].mean()
    OOS_Yresult1Y.loc[idx, 'vwretdt_mean'] = rt_mean 
OOS_Rsquared1Y = pd.DataFrame(columns=['Rz.', '1Y'])
for regressor in regressors:
    for idx in range(Tidx + 1, fitdataset1Y.shape[0]):
        fitY = fitdataset1Y.loc[:idx - 1 - args.Lwindow, 'vwretd']
        fitX = fitdataset1Y.loc[:idx - 1 - args.Lwindow, f'{regressor}']
        fitX = sm.add_constant(fitX)
        rgmodel = sm.OLS(fitY, fitX).fit()
        Xpred = np.array([1, fitdataset1Y.loc[idx, f'{regressor}']])
        Ypred = rgmodel.params @ Xpred
        OOS_Yresult1Y.loc[idx, f'{regressor}'] = Ypred
    rsquared = 1 - ((OOS_Yresult1Y['vwretd'] - OOS_Yresult1Y[f'{regressor}']) ** 2).sum() / ((OOS_Yresult1Y['vwretd'] - OOS_Yresult1Y['vwretdt_mean']) ** 2 ).sum()
    rsquared = np.round(rsquared * 100, 1)
    OOS_Rsquared1Y = OOS_Rsquared1Y.append({'Rz.':f'{regressor}', '1Y':rsquared}, ignore_index=True)
    print(f'-- finish {regressor} 1Y. --')
OOS_Yresult['1Y'] = OOS_Yresult1Y
print(OOS_Rsquared1Y)

# 3Y    
args.Lwindow = 36
fitdataset3Y = pd.DataFrame()
fitdataset3Y['date'] = fitdataset['date'][:-args.Lwindow]
fitdataset3Y['vwretd'] = cal_aggHorizon_ret(fitdataset['vwretd'], H=args.Lwindow)
for col in regressors:
    fitdataset3Y[col] = fitdataset[col][:-args.Lwindow]
OOS_Yresult3Y = pd.DataFrame()
OOS_Yresult3Y[['date', 'vwretd']] = fitdataset3Y.loc[Tidx + 1:, ['date', 'vwretd']]
for idx in range(Tidx + 1, fitdataset3Y.shape[0]):
    rt_mean = fitdataset3Y.loc[:idx - 1 - args.Lwindow, 'vwretd'].mean()
    OOS_Yresult3Y.loc[idx, 'vwretdt_mean'] = rt_mean 
OOS_Rsquared3Y = pd.DataFrame(columns=['Rz.', '3Y'])
for regressor in regressors:
    for idx in range(Tidx + 1, fitdataset3Y.shape[0]):
        fitY = fitdataset3Y.loc[:idx - 1 - args.Lwindow, 'vwretd']
        fitX = fitdataset3Y.loc[:idx - 1 - args.Lwindow, f'{regressor}']
        fitX = sm.add_constant(fitX)
        rgmodel = sm.OLS(fitY, fitX).fit()
        Xpred = np.array([1, fitdataset3Y.loc[idx, f'{regressor}']])
        Ypred = rgmodel.params @ Xpred
        OOS_Yresult3Y.loc[idx, f'{regressor}'] = Ypred
    rsquared = 1 - ((OOS_Yresult3Y['vwretd'] - OOS_Yresult3Y[f'{regressor}']) ** 2).sum() / ((OOS_Yresult3Y['vwretd'] - OOS_Yresult3Y['vwretdt_mean']) ** 2 ).sum()
    rsquared = np.round(rsquared * 100, 1)
    OOS_Rsquared3Y = OOS_Rsquared3Y.append({'Rz.':f'{regressor}', '3Y':rsquared}, ignore_index=True)
    print(f'-- finish {regressor} 3Y. --')
OOS_Yresult['3Y'] = OOS_Yresult3Y
print(OOS_Rsquared3Y)

# 5Y    
args.Lwindow = 60
fitdataset5Y = pd.DataFrame()
fitdataset5Y['date'] = fitdataset['date'][:-args.Lwindow]
fitdataset5Y['vwretd'] = cal_aggHorizon_ret(fitdataset['vwretd'], H=args.Lwindow)
for col in regressors:
    fitdataset5Y[col] = fitdataset[col][:-args.Lwindow]
OOS_Yresult5Y = pd.DataFrame()
OOS_Yresult5Y[['date', 'vwretd']] = fitdataset5Y.loc[Tidx + 1:, ['date', 'vwretd']]
for idx in range(Tidx + 1, fitdataset5Y.shape[0]):
    rt_mean = fitdataset5Y.loc[:idx - 1 - args.Lwindow, 'vwretd'].mean()
    OOS_Yresult5Y.loc[idx, 'vwretdt_mean'] = rt_mean 
OOS_Rsquared5Y = pd.DataFrame(columns=['Rz.', '5Y'])
for regressor in regressors:
    for idx in range(Tidx + 1, fitdataset5Y.shape[0]):
        fitY = fitdataset5Y.loc[:idx - 1 - args.Lwindow, 'vwretd']
        fitX = fitdataset5Y.loc[:idx - 1 - args.Lwindow, f'{regressor}']
        fitX = sm.add_constant(fitX)
        rgmodel = sm.OLS(fitY, fitX).fit()
        Xpred = np.array([1, fitdataset5Y.loc[idx, f'{regressor}']])
        Ypred = rgmodel.params @ Xpred
        OOS_Yresult5Y.loc[idx, f'{regressor}'] = Ypred
    rsquared = 1 - ((OOS_Yresult5Y['vwretd'] - OOS_Yresult5Y[f'{regressor}']) ** 2).sum() / ((OOS_Yresult5Y['vwretd'] - OOS_Yresult5Y['vwretdt_mean']) ** 2 ).sum()
    rsquared = np.round(rsquared * 100, 1)
    OOS_Rsquared5Y = OOS_Rsquared5Y.append({'Rz.':f'{regressor}', '5Y':rsquared}, ignore_index=True)
    print(f'-- finish {regressor} 5Y. --')
OOS_Yresult['5Y'] = OOS_Yresult5Y
print(OOS_Rsquared5Y)
OOS_Rsquared = pd.concat([OOS_Rsquared1M, OOS_Rsquared1Y.iloc[:,1], OOS_Rsquared3Y.iloc[:,1], OOS_Rsquared5Y.iloc[:,1]], axis=1)
print(OOS_Rsquared)
# save
args.save_dir = './logs/OOSPred/'
if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir)
OOS_Rsquared.to_csv(os.path.join(args.save_dir, f'OOSPred_{args.dataset_start}_{args.dataset_end}.csv'), index=False)

-- finish Tail 1M. --
-- finish Book-to-market 1M. --
-- finish Default return spread 1M. --
-- finish Default yield spread 1M. --
-- finish Dividend payout ratio 1M. --
-- finish Dividend price ratio 1M. --
-- finish Earnings price ratio 1M. --
-- finish Inflation 1M. --
-- finish Long-term return 1M. --
-- finish Long-term yield 1M. --
-- finish Net equity expansion 1M. --
-- finish Stock volatility 1M. --
-- finish Term spread 1M. --
-- finish Treasury-bill rate 1M. --
                      Rz.   1M
0                    Tail  0.1
1          Book-to-market -1.0
2   Default return spread -1.0
3    Default yield spread -0.3
4   Dividend payout ratio -1.7
5    Dividend price ratio -0.8
6    Earnings price ratio -1.7
7               Inflation -0.8
8        Long-term return  0.2
9         Long-term yield -1.0
10   Net equity expansion -0.9
11       Stock volatility -5.9
12            Term spread -0.5
13     Treasury-bill rate -1.1
-- finish Tail 1Y. --
-- finish Book-to-market 1Y. --
-- f

In [4]:
print(OOS_Rsquared)

                      Rz.   1M    1Y    3Y    5Y
0                    Tail  0.1   2.2  17.9  23.4
1          Book-to-market -1.0  -7.3 -19.6 -40.9
2   Default return spread -1.0  -0.4  -0.2  -0.0
3    Default yield spread -0.3   2.7 -12.7  -1.5
4   Dividend payout ratio -1.7 -22.2 -17.7 -64.6
5    Dividend price ratio -0.8  -8.7 -10.8  -4.6
6    Earnings price ratio -1.7 -14.7  -2.6 -20.7
7               Inflation -0.8  -2.7  -3.3 -12.7
8        Long-term return  0.2   1.1   0.3   1.3
9         Long-term yield -1.0  -8.8  -2.1 -31.6
10   Net equity expansion -0.9  -9.4  -6.0 -12.4
11       Stock volatility -5.9 -27.6 -32.5 -32.3
12            Term spread -0.5  -2.1  -8.3  11.1
13     Treasury-bill rate -1.1  -9.2  -0.8 -29.1
