In [3]:
"""
Created on Tue Jun 27 08:25:15 2023

@author: hugo511
"""

import argparse
import pandas as pd
# pd.set_option('display.float_format', lambda x:'%.2f'%x)
import numpy as np
np.set_printoptions(suppress=True)
import time
import warnings
warnings.filterwarnings('ignore')
import os 
import statsmodels.api as sm
import sys
sys.path.append('../')

parser = argparse.ArgumentParser(description="Asset Pricing - Tail stocks return regression")
parser.add_argument('--dataset_start', type=int, default=196301, help='dataset start date')
parser.add_argument('--dataset_end', type=int, default=201012, help='dataset end date')
parser.add_argument('--qt', type=float, default=0.05, help='quantile value - u_t')
parser.add_argument('--save_dir', type=str, default='./testTailStocksRET_table1/', help='results save dirs')
parser.add_argument('--isTest', type=bool, default=True, help='is Test or not')
parser.add_argument('--Lwindow', type=int, default=1, help='length of rolling window')
parser.add_argument('-f')
args = parser.parse_args()

def cal_aggHorizon_ret(Yret_init, H):
    Yagg = np.array([])
    for idx in range(1, Yret_init.shape[0] - H + 1):
        Yagg_idx = (Yret_init[idx: idx + H] + 1).prod() - 1
        Yagg = np.append(Yagg, Yagg_idx)
    return Yagg

class computeHodrick1992VCovMatrix:
    def __init__(self, Ysub_H, Ysub_1, X, H):
        self.Ysub_H = Ysub_H
        self.Ysub_1 = Ysub_1
        self.X = X
        self.H = H 
        self.nrows, self.ncols = X.shape
        C = np.ones([Ysub_1.shape[0], 1])
        rg_sub_1 = sm.OLS(Ysub_1, C).fit()
        self.resid_sub_1 = rg_sub_1.resid
        
    def computeZ(self):
        Z = np.zeros([self.ncols, self.ncols])
        for i in range(self.nrows):
            Xi = np.expand_dims(self.X[i,], axis=1)
            Z = Z + Xi @ Xi.T
        Z = (1/self.nrows) * Z
        return Z
    
    def computelittleM(self, i, H):
        m = np.zeros([self.ncols, 1])
        for h in range(H): # h = 0:H-1
            Xi = np.expand_dims(self.X[i-h], axis=1)
            m = m + Xi
        m = self.resid_sub_1[i] * m
        return m
    
    def computeS(self, H):
        S = np.zeros([self.ncols, self.ncols])
        for i in range(H-1, self.nrows):
            S = S + (self.computelittleM(i, H) @ (self.computelittleM(i, H)).T)
        S = (1/self.nrows) * S
        return S
    
    def forward(self, H):
        Ssub_H = self.computeS(H)
        Zsub_H = self.computeZ()
        Vsub_H = np.linalg.inv(Zsub_H) @ Ssub_H @ np.linalg.inv(Zsub_H) / self.nrows
        return Vsub_H


if __name__ == '__main__':
    # dataset
    args.dataset_start = 196301
    args.dataset_end = 202212
    args.isTest = False
    args.tail_dir = './logs/Hill_estimate_results/'
    # tail lambda_t
    tail_estimates = pd.read_csv(os.path.join(args.tail_dir, f'Hillestimate_plotresults_{args.dataset_start}_{args.dataset_end}.csv'))
    tail_estimates = tail_estimates[(tail_estimates['date'] >= args.dataset_start) & (tail_estimates['date'] <= args.dataset_end)]
    # tail_estimates.columns
    
    # market return vwretd monthly
    mkt_monthlyret = pd.read_csv('./Dataset/MarketRET_Monthly_192601_202212.csv')
    mkt_monthlyret = mkt_monthlyret[(mkt_monthlyret['date'] >= args.dataset_start) & (mkt_monthlyret['date'] <= args.dataset_end)].reset_index(drop=True)
    
    # equity_predictor 
    equity_predictor = pd.read_csv('./Dataset/EquityPredictorDataset_Monthly_192601_202212.csv')
    equity_predictor = equity_predictor[(equity_predictor['date'] >= args.dataset_start) & (equity_predictor['date'] <= args.dataset_end)].reset_index(drop=True)
    
    # FF riskfree rate 
    FF_dataset = pd.read_csv('./Dataset/FF3factors_Momentum_Monthly_192601_202212.csv')
    FF_dataset['dateff'] = FF_dataset['dateff'].apply(lambda x: int(str(x)[:6]))
    FF_dataset.rename(columns={'dateff':'date'}, inplace=True)
    rf_dataset = FF_dataset[(FF_dataset['date'] >= args.dataset_start) & (FF_dataset['date'] <= args.dataset_end)][['date', 'rf']].reset_index(drop=True)
    
    # fitting dataset
    fitdataset = pd.concat([equity_predictor['date'], mkt_monthlyret['vwretd'], tail_estimates['lambda_t'], equity_predictor.iloc[:, 1:-2]], axis=1)
    fitdataset = fitdataset.rename(columns={'lambda_t':'Tail'})
    # fitdataset.columns
    for col in fitdataset.columns[2:]:
        fitdataset[col] = (fitdataset[col] - fitdataset[col].mean()) / fitdataset[col].std()

In [4]:
#%% table 1 
regressors = ['Tail', 'Book-to-market', 'Default return spread', 'Default yield spread', 'Dividend payout ratio', 'Dividend price ratio'
              , 'Earnings price ratio', 'Inflation', 'Long-term return', 'Long-term yield', 'Net equity expansion', 'Stock volatility'
              , 'Term spread', 'Treasury-bill rate']
# 1M 
args.Lwindow = 1
fitdataset1M = pd.DataFrame()
fitdataset1M['date'] = fitdataset['date'][:-args.Lwindow]
fitdataset1M['vwretd'] = fitdataset['vwretd'].shift(-1).dropna()
# fitdataset1M = fitdataset1M.merge(rf_dataset, how='left', on='date')
for col in regressors:
    fitdataset1M[col] = fitdataset[col][:-args.Lwindow]
TailStocksRET1M_df = pd.DataFrame(columns=['Rz.', 'Coeff.', 't-stat.', 'R2'])
for regressor in regressors:
    fitY = fitdataset1M['vwretd'] #- fitdataset1M['rf']
    fitX = fitdataset1M[regressor]
    fitX = sm.add_constant(fitX)
    rgmodel = sm.OLS(fitY, fitX).fit()
    coeff = np.round(rgmodel.params[f'{regressor}'] * 12 * 100, 2)
    t_stat = np.round(rgmodel.tvalues[f'{regressor}'], 2)
    rsquared = np.round(rgmodel.rsquared * 100, 1)
    # Hodrick standard error
    Ysub_H = np.array(fitY); Ysub_1 = np.array(fitY); X = np.array(fitX); H = args.Lwindow
    HodrickV = computeHodrick1992VCovMatrix(Ysub_H, Ysub_1, X, H)
    t_Hodrick1M = np.round(rgmodel.params[f'{regressor}'] / np.sqrt(HodrickV.forward(H)[1, 1]), 2)       
    TailStocksRET1M_df = TailStocksRET1M_df.append({'Rz.':f'{regressor}', 'Coeff.':coeff, 't-stat.':t_stat, 'R2':rsquared},ignore_index=True)
print(TailStocksRET1M_df)

# 1Y 
args.Lwindow = 12
fitdataset1Y = pd.DataFrame()
fitdataset1Y['date'] = fitdataset['date'][:-args.Lwindow]   
fitdataset1Y['vwretd'] = cal_aggHorizon_ret(fitdataset['vwretd'], args.Lwindow)
# fitdataset1Y = fitdataset1Y.merge(rf_dataset, how='left', on='date')
for col in regressors:
    fitdataset1Y[col] = fitdataset[col][:-args.Lwindow]
TailStocksRET1Y_df = pd.DataFrame(columns=['Rz.', 'Coeff.', 't-stat.', 'R2'])
for regressor in regressors:
    fitY = fitdataset1Y['vwretd'] #- fitdataset1Y['rf']
    fitX = fitdataset1Y[regressor]
    fitX = sm.add_constant(fitX)
    rgmodel = sm.OLS(fitY, fitX).fit()
    coeff = np.round(rgmodel.params[f'{regressor}'] * 100, 2)
    t_stat = np.round(rgmodel.tvalues[f'{regressor}'], 2)
    rsquared = np.round(rgmodel.rsquared * 100, 1)
    # Hodrick standard error
    Ysub_H = np.array(fitY); Ysub_1 = np.array(fitdataset1M['vwretd']); X = np.array(fitX); H = args.Lwindow - 1
    HodrickV = computeHodrick1992VCovMatrix(Ysub_H, Ysub_1, X, H)
    t_Hodrick1Y = np.round(rgmodel.params[f'{regressor}'] / np.sqrt(HodrickV.forward(H)[1, 1]), 2)
    TailStocksRET1Y_df = TailStocksRET1Y_df.append({'Rz.':f'{regressor}', 'Coeff.':coeff, 't-stat.':t_Hodrick1Y, 'R2':rsquared},ignore_index=True)
print(TailStocksRET1Y_df)

# 3Y 
args.Lwindow = 36
fitdataset3Y = pd.DataFrame()
fitdataset3Y['date'] = fitdataset['date'][:-args.Lwindow]   
fitdataset3Y['vwretd'] = cal_aggHorizon_ret(fitdataset['vwretd'], args.Lwindow)
# fitdataset3Y = fitdataset3Y.merge(rf_dataset, how='left', on='date')
for col in regressors:
    fitdataset3Y[col] = fitdataset[col][:-args.Lwindow]
TailStocksRET3Y_df = pd.DataFrame(columns=['Rz.', 'Coeff.', 't-stat.', 'R2'])
for regressor in regressors:
    fitY = fitdataset3Y['vwretd'] #- fitdataset3Y['rf']
    fitX = fitdataset3Y[regressor]
    fitX = sm.add_constant(fitX)
    rgmodel = sm.OLS(fitY, fitX).fit()
    coeff = np.round(rgmodel.params[f'{regressor}'] / 3 * 100, 2)
    t_stat = np.round(rgmodel.tvalues[f'{regressor}'], 2)
    rsquared = np.round(rgmodel.rsquared * 100, 1)
    # Hodrick standard error
    Ysub_H = np.array(fitY); Ysub_1 = np.array(fitdataset1M['vwretd']); X = np.array(fitX); H = args.Lwindow
    HodrickV = computeHodrick1992VCovMatrix(Ysub_H, Ysub_1, X, H)
    t_Hodrick3Y = np.round(rgmodel.params[f'{regressor}'] / np.sqrt(HodrickV.forward(H)[1, 1]), 2)
    TailStocksRET3Y_df = TailStocksRET3Y_df.append({'Rz.':f'{regressor}', 'Coeff.':coeff, 't-stat.':t_Hodrick3Y, 'R2':rsquared},ignore_index=True)
print(TailStocksRET3Y_df)

# 5Y 
args.Lwindow = 60
fitdataset5Y = pd.DataFrame()
fitdataset5Y['date'] = fitdataset['date'][:-args.Lwindow]   
fitdataset5Y['vwretd'] = cal_aggHorizon_ret(fitdataset['vwretd'], args.Lwindow)
# fitdataset5Y = fitdataset5Y.merge(rf_dataset, how='left', on='date')
for col in regressors:
    fitdataset5Y[col] = fitdataset[col][:-args.Lwindow]
TailStocksRET5Y_df = pd.DataFrame(columns=['Rz.', 'Coeff.', 't-stat.', 'R2'])
for regressor in regressors:
    fitY = fitdataset5Y['vwretd'] #- fitdataset5Y['rf']
    fitX = fitdataset5Y[regressor]
    fitX = sm.add_constant(fitX)
    rgmodel = sm.OLS(fitY, fitX).fit()
    coeff = np.round(rgmodel.params[f'{regressor}'] / 5 * 100, 2)
    t_stat = np.round(rgmodel.tvalues[f'{regressor}'], 2)
    rsquared = np.round(rgmodel.rsquared * 100, 1)
    # Hodrick standard error
    Ysub_H = np.array(fitY); Ysub_1 = np.array(fitdataset1M['vwretd']); X = np.array(fitX); H = args.Lwindow
    HodrickV = computeHodrick1992VCovMatrix(Ysub_H, Ysub_1, X, H)
    t_Hodrick5Y = np.round(rgmodel.params[f'{regressor}'] / np.sqrt(HodrickV.forward(H)[1, 1]), 2)
    TailStocksRET5Y_df = TailStocksRET5Y_df.append({'Rz.':f'{regressor}', 'Coeff.':coeff, 't-stat.':t_Hodrick5Y, 'R2':rsquared},ignore_index=True)
print(TailStocksRET5Y_df)
TailStocksRet_df = pd.concat([TailStocksRET1M_df, TailStocksRET1Y_df.iloc[:,1:], TailStocksRET3Y_df.iloc[:,1:], TailStocksRET5Y_df.iloc[:,1:]], axis=1)
if args.isTest:
    args.save_dir = './logs/testUnivariatePred/'
else:
    args.save_dir = './logs/UnivariatePred/'
if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir)
TailStocksRet_df.to_csv(os.path.join(args.save_dir, f'HillUnivariatePred_{args.dataset_start}_{args.dataset_end}.csv'), index=False)

                      Rz.  Coeff.  t-stat.   R2
0                    Tail    3.55     1.78  0.4
1          Book-to-market    2.93     1.47  0.3
2   Default return spread    1.90     0.95  0.1
3    Default yield spread    3.97     1.99  0.6
4   Dividend payout ratio    1.99     1.00  0.1
5    Dividend price ratio    4.23     2.12  0.6
6    Earnings price ratio    2.58     1.29  0.2
7               Inflation   -2.84    -1.42  0.3
8        Long-term return    5.25     2.65  1.0
9         Long-term yield    0.55     0.27  0.0
10   Net equity expansion   -0.97    -0.49  0.0
11       Stock volatility   -1.41    -0.71  0.1
12            Term spread    2.06     1.03  0.1
13     Treasury-bill rate   -0.44    -0.22  0.0
                      Rz.  Coeff.  t-stat.   R2
0                    Tail    3.27     2.09  3.7
1          Book-to-market    3.23     1.61  3.7
2   Default return spread   -0.05    -0.07  0.0
3    Default yield spread    4.08     2.35  5.9
4   Dividend payout ratio    2.26     1.

In [7]:
print(TailStocksRet_df)

                      Rz.  Coeff.  t-stat.   R2  Coeff.  t-stat.   R2  Coeff.  \
0                    Tail    3.55     1.78  0.4    3.27     2.09  3.7    4.30   
1          Book-to-market    2.93     1.47  0.3    3.23     1.61  3.7    2.17   
2   Default return spread    1.90     0.95  0.1   -0.05    -0.07  0.0    0.04   
3    Default yield spread    3.97     1.99  0.6    4.08     2.35  5.9    2.20   
4   Dividend payout ratio    1.99     1.00  0.1    2.26     1.38  1.8    1.16   
5    Dividend price ratio    4.23     2.12  0.6    4.56     2.34  7.2    3.98   
6    Earnings price ratio    2.58     1.29  0.2    2.63     1.36  2.5    2.79   
7               Inflation   -2.84    -1.42  0.3   -1.67    -1.15  1.0    0.36   
8        Long-term return    5.25     2.65  1.0    1.99     3.35  1.4    0.80   
9         Long-term yield    0.55     0.27  0.0    2.06     1.16  1.5    3.14   
10   Net equity expansion   -0.97    -0.49  0.0   -1.35    -0.68  0.6   -0.08   
11       Stock volatility   