# Imports

In [None]:
import os
import sys
import pandas as pd
from scipy.stats import norm, chi2
import statsmodels.api as sm
import numpy as np
from functools import partial
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
grandparent_dir = os.path.abspath(os.path.join(parent_dir, os.pardir))
sys.path.insert(0, parent_dir)
sys.path.insert(0, grandparent_dir)
import cmds.portfolio_management_helper as pmh

plt.style.use("seaborn-v0_8-whitegrid")
PLOT_WIDTH, PLOT_HEIGHT = 8, 5
COLORS = ["blue", "red", "orange"]

warnings.filterwarnings('ignore')
pd.options.display.float_format = "{:.4f}".format
p = plt.rcParams

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
data_path = parent_dir + '/data/'
ff_file_name = data_path + 'momentum_data.xlsx'
excess_ff_factors = pmh.read_excel_default(ff_file_name, 
                                 sheet_name='factors (excess returns)',
                                 index_col='Date', parse_dates=True)
excess_momentum = pmh.read_excel_default(ff_file_name, 
                                 sheet_name='momentum (excess returns)',
                                 index_col='Date', parse_dates=True)
total_deciles = pmh.read_excel_default(ff_file_name, 
                                 sheet_name='deciles (total returns)',
                                 index_col='Date', parse_dates=True)
total_size_sorts = pmh.read_excel_default(ff_file_name, 
                                 sheet_name='size_sorts (total returns)',
                                 index_col='Date', parse_dates=True)
rf_rate = pmh.read_excel_default(ff_file_name, 
                                 sheet_name='risk-free rate',
                                 index_col='Date', parse_dates=True)

barn_file_name = data_path + 'barnstable_analysis_data.xlsx'
barn_rets = pmh.read_excel_default(barn_file_name, 
                                 sheet_name='data',
                                 index_col='date', parse_dates=True)

# Summary Tables

## Single Timeframe

In [None]:
pmh.calc_summary_statistics(factors, annual_factor=12, correlations=False, provided_excess_returns=True,
                            keep_columns=['Annualized Mean',
                                            'Annualized Vol',
                                            'Annualized Sharpe',
                                            'Annualized Historical VaR',
                                            'Annualized Historical CVaR']).T

## Multiple Timeframes (with Correlations!)

In [None]:
summary_table = pmh.calc_summary_statistics(ff_factors, annual_factor=12, provided_excess_returns=True, 
                            timeframes={'1927-2024':['1927', '2024'],
                                        '1927-1993':['1927', '1993'],
                                        '1994-2008':['1994', '2008'],
                                        '2009-2024':['2009', '2024']},
                            correlations=['MKT', 'HML'],
                            keep_columns=['Annualized Mean', 'Annualized Vol', 'Annualized Sharpe', 'Skewness', 'Correlation'])
summary_table.loc[summary_table.index.str.contains('UMD')]

## Extreme Re-Formatting :)

In [None]:
barn_rets['SPX-XS'] = barn_rets['SPX'] - barn_rets['TB1M']
barn_rets['LOG_SPX'] = np.log(1 + barn_rets['SPX'])
barn_rets['LOG_TB1M'] = np.log(1 + barn_rets['TB1M'])
barn_rets['LOG_SPX-XS'] = np.log(1 + barn_rets['SPX-XS'])

# Initial Summary Table
barn_summary_table = pmh.calc_summary_statistics(barn_rets, annual_factor=12, provided_excess_returns=True,
                            timeframes={'1965-1999': ['1965', '1999'],
                                        '2000-2024':['2000', '2024'],
                                        '1926-2024':['1926', '2024']}, correlations=False,
                            keep_columns=['Annualized Mean', 'Annualized Vol'])

# Stripping out Quadrants
logs_1965 = barn_summary_table.loc[barn_summary_table.index.str.contains('LOG') & barn_summary_table.index.str.contains('1965')]
logs_2000 = barn_summary_table.loc[barn_summary_table.index.str.contains('LOG') & barn_summary_table.index.str.contains('2000')]
logs_1926 = barn_summary_table.loc[barn_summary_table.index.str.contains('LOG') & barn_summary_table.index.str.contains('1926')]

levels_1965 = barn_summary_table.loc[~barn_summary_table.index.str.contains('LOG') & barn_summary_table.index.str.contains('1965')]
levels_2000 = barn_summary_table.loc[~barn_summary_table.index.str.contains('LOG') & barn_summary_table.index.str.contains('2000')]
levels_1926 = barn_summary_table.loc[~barn_summary_table.index.str.contains('LOG') & barn_summary_table.index.str.contains('1926')]

# Trimming Names for desired format
for frame in [logs_1965, logs_2000, logs_1926, levels_1965, levels_2000, levels_1926]:
    frame.index = [x.split()[0] for x in frame.index]
for frame in [logs_1965, logs_2000, logs_1926]:
    frame.index = [x.split('_')[1] for x in frame.index]

# Multi-Index Header & Combining Quadrants
columns = pd.MultiIndex.from_product([['1965-1999', '2000-2024', '1926-2024'], levels_1965.columns])
levels = pd.concat([levels_1965, levels_2000, levels_1926], axis=1)
levels.columns = columns
logs = pd.concat([logs_1965, logs_2000, logs_1926], axis=1)
logs.columns = columns

pd.concat({'Levels': levels, 'Logs': logs}).style.format('{:.2%}')

# Linear Factor Pricing Models (LFPM)

## Time Series Regression

### Single Dependent Variable

### Iterative Regression (Multiple Dependent Variables)

In [None]:
capm_ts = pmh.calc_iterative_regression(pf_excess_rets.loc['1981':], factors.loc['1981':],
                              warnings=False,
                              keep_columns=['Alpha', 'Beta', 'R-Squared',
                                            'Annualized Treynor Ratio', 'Annualized Information Ratio', 
                                            'Annualized Tracking Error'])
display(capm_ts)

### Mean Absolute Error Test

**Note:** This MAE test is different from the CS MAE test because we measure the MAE of the *alphas* across the TS regressions.

In [None]:
pd.DataFrame((capm_ts['Annualized Alpha']).abs().mean(), columns = ['MAE (%)'], index = ['CAPM'])

In [None]:
# Fama-French 5-Factor Test
FF5F = ['MKT', 'SMB', 'HML', 'RMW', 'CMA']
ff5f_ts_test = pmh.calc_iterative_regression(portfolios, factors[FF5F], annual_factor=12,intercept=True, 
                                            keep_columns=['Annualized Alpha', 'R-Squared'])
display(ff5f_ts_test)
print(f'Mean-Absolute-Error: {ff5f_ts_test['Annualized Alpha'].abs().sum() / len(ff5f_ts_test):.2%}\
      \nMin-Absolute-Error: {ff5f_ts_test['Annualized Alpha'].abs().idxmin()} - {ff5f_ts_test['Annualized Alpha'].abs().min():.2%}\
      \nMax-Absolute-Error: {ff5f_ts_test['Annualized Alpha'].abs().idxmax()} - {ff5f_ts_test['Annualized Alpha'].abs().max():.2%}')

## Cross-Sectional Regression

### Mean Absolute Error Test

**Note:** This MAE test is different from the TS MAE test because we measure MAE in the cross-sectional regression as the sum of *error residuals*

In [None]:
capm_cs_test = pmh.calc_cross_section_regression(portfolios, factors['MKT'].to_frame(),provided_excess_returns=True, annual_factor=12, 
                                                 name='CAPM',keep_columns=['R-Squared', 'Annualized Eta', 'Annualized Lambda', 
                                                                           'TS Annualized MAE', 'CS Annualized MAE']).T
aqr_cs_test = pmh.calc_cross_section_regression(portfolios, factors[AQR],provided_excess_returns=True, annual_factor=12, 
                                                 name='AQR',keep_columns=['R-Squared', 'Annualized Eta', 'Annualized Lambda', 
                                                                           'TS Annualized MAE', 'CS Annualized MAE']).T
ff3f_cs_test = pmh.calc_cross_section_regression(portfolios, factors[FF3F],provided_excess_returns=True, annual_factor=12, 
                                                 name='FF3F',keep_columns=['R-Squared', 'Annualized Eta', 'Annualized Lambda', 
                                                                           'TS Annualized MAE', 'CS Annualized MAE']).T
ff5f_cs_test = pmh.calc_cross_section_regression(portfolios, factors[FF5F],provided_excess_returns=True, annual_factor=12, 
                                                 name='FF5F',keep_columns=['R-Squared', 'Annualized Eta', 'Annualized Lambda', 
                                                                           'TS Annualized MAE', 'CS Annualized MAE']).T
pd.concat([capm_cs_test, aqr_cs_test, ff3f_cs_test, ff5f_cs_test], axis=1)

### Bivariate Significance Test

In [None]:
# Calculating Residuals Covariance Matrix
resid = pd.DataFrame()
for pf in pf_excess_rets.columns:
    r = pmh.calc_regression(pf_excess_rets.loc['1981':, pf], factors.loc['1981':, 'Mkt-RF'].to_frame('Mkt-RF'), 
                            annual_factor=12, warnings=False, return_model=True, return_fitted_values=False)
    r = r.resid.to_frame(pf)
    resid = pd.concat([resid, r], axis=1)

# Conducting the H- and t-tests (requires residuals from above and alphas from iterative regression)
T = pf_excess_rets['1981':].shape[0]
SR = (factors['1981':]['Mkt-RF'].mean() / factors['1981':]['Mkt-RF'].std()) #* np.sqrt(12)
Sigma = resid.cov()
Sigma_inv = pd.DataFrame(np.linalg.inv(Sigma), index=Sigma.index, columns=Sigma.columns)
alpha = capm_ts['Alpha']    # Not Annualized

H = T * (1 + SR**2)**(-1) * (alpha @ Sigma_inv @ alpha)

print('H = {:.2f}'.format(H))
pvalue = 1 - chi2.cdf(H, df=25)
print('p-value = {:.4f}'.format(pvalue))

# Probability of Under Performance

In [None]:
def prob_under(mu, sigma, c, h):
    return norm.cdf(((c-mu)/sigma) * np.sqrt(h))

mu = barn_rets.loc['1965':'1999', 'LOG_SPX-XS'].mean()
sigma = barn_rets.loc['1965':'1999', 'LOG_SPX-XS'].std()

print(f'From 1965-1999, Monthly:\n\tPr(SPX Returns < RF Returns) = {prob_under(mu, sigma, c=0, h=1):.2%}')

In [None]:
x = np.arange(0, 12*30)
y = prob_under(mu, sigma, c=0, h=x)

fig, ax = plt.subplots()
ax.plot(x, y)
ax.set_title('Change in Probability of Under-Performance\nover Different Time Horizons\n(1965-1999)')
ax.set(xlabel='Holding Period\n(Months)', ylabel='Pr(SPX < RF)')