In [15]:
# Data Extraction
import pandas as pd 
import numpy as np
factors = pd.read_excel('./midterm_2_data.xlsx', sheet_name = 'factors (excess returns)').set_index('Date')
portfolio = pd.read_excel('./midterm_2_data.xlsx', sheet_name = 'portfolios (excess returns)').set_index('Date')

annualization_factor = 12

import statsmodels.api as sm


def time_series_regression(portfolio, factors, FF3F = False, resid = False):
    
    ff_report = pd.DataFrame(index=portfolio.columns)
    bm_residuals = pd.DataFrame(columns=portfolio.columns)

    rhs = sm.add_constant(factors)

    for portf in portfolio.columns:
        lhs = portfolio[portf]
        res = sm.OLS(lhs, rhs, missing='drop').fit()
        ff_report.loc[portf, 'alpha_hat'] = res.params['const'] 
        ff_report.loc[portf, 'Market beta'] = res.params[1]
        if FF3F:
            ff_report.loc[portf, 'Value beta'] = res.params[2] 
            ff_report.loc[portf, 'Momentum beta'] = res.params[3]
            
        ff_report.loc[portf, 'info_ratio'] = np.sqrt(12) * res.params['const'] / res.resid.std()
        ff_report.loc[portf, 'treynor_ratio'] = 12 * portfolio[portf].mean() / res.params[1]
        ff_report.loc[portf, 'R-squared'] = res.rsquared
        ff_report.loc[portf, 'Tracking Error'] = (res.resid.std()*np.sqrt(12))

        if resid:
            bm_residuals[portf] = res.resid
    if resid:
        return bm_residuals
        
    return ff_report

In [16]:
ts_FF = time_series_regression(portfolio, factors,True)
ts_FF['alpha_hat']*=12
ts_FF[['alpha_hat','Market beta', 'Value beta','Momentum beta','R-squared']]

Unnamed: 0,alpha_hat,Market beta,Value beta,Momentum beta,R-squared
NoDur,0.029253,0.739522,0.20458,0.049333,0.617919
Durbl,0.010734,1.271865,0.173595,-0.320023,0.613493
Manuf,-0.000996,1.049482,0.197462,-0.036704,0.870268
Enrgy,-0.015117,0.992222,0.637006,0.07517,0.465602
HiTec,0.028207,1.154959,-0.637135,-0.140638,0.829498
Telcm,0.003506,0.837326,0.094363,-0.084518,0.588052
Shops,0.026739,0.946928,-0.042222,-0.015005,0.742161
Hlth,0.031862,0.757605,-0.119928,0.074058,0.580514
Utils,0.01371,0.527879,0.353033,0.108622,0.342654
Other,-0.01978,1.115433,0.426753,-0.048678,0.910098


In [17]:
ts_FF.head()

Unnamed: 0,alpha_hat,Market beta,Value beta,Momentum beta,info_ratio,treynor_ratio,R-squared,Tracking Error
NoDur,0.029253,0.739522,0.20458,0.049333,0.331568,0.134449,0.617919,0.088227
Durbl,0.010734,1.271865,0.173595,-0.320023,0.063625,0.080186,0.613493,0.168711
Manuf,-0.000996,1.049482,0.197462,-0.036704,-0.015822,0.085455,0.870268,0.062979
Enrgy,-0.015117,0.992222,0.637006,0.07517,-0.091485,0.08936,0.465602,0.165238
HiTec,0.028207,1.154959,-0.637135,-0.140638,0.294943,0.086957,0.829498,0.095634


In [18]:
mean_portfolios = (portfolio.mean()*12).to_frame('Mean Portfolio excess returns')
CS = time_series_regression(mean_portfolios, ts_FF.loc[:,['Market beta', 'Value beta','Momentum beta']], True)
CS.columns = ['Annualized Intercept','Market regression coefficient','Value regression coefficients',
            'Momentum regression coefficients','info_ratio','treynor_ratio','R-squared','Tracking Error']


In [19]:
CS

Unnamed: 0,Annualized Intercept,Market regression coefficient,Value regression coefficients,Momentum regression coefficients,info_ratio,treynor_ratio,R-squared,Tracking Error
Mean Portfolio excess returns,0.063716,0.031992,-0.015767,0.030301,23.081584,34.025053,0.366198,0.033126


In [20]:
# annualized factor premia 

# this is for the time series regression
factors.mean() * 12 

MKT    0.083853
HML    0.025028
UMD    0.061692
dtype: float64

In [21]:
# this is for the cross sectional regression -> factor premia 
CS[['Market regression coefficient','Value regression coefficients',
    'Momentum regression coefficients']]

Unnamed: 0,Market regression coefficient,Value regression coefficients,Momentum regression coefficients
Mean Portfolio excess returns,0.031992,-0.015767,0.030301


#### Cross Sectional Test 
- just use the coeffecients and betas [factors] to regress against the portfolios 

In [22]:
resid = time_series_regression(mean_portfolios, ts_FF.loc[:,['Market beta', 'Value beta','Momentum beta']], False, True)
mae = pd.DataFrame([(((ts_FF['alpha_hat']).abs().mean()))], columns = ['MAE (%)'], index = ['TS'])
mae.loc['CS'] = abs(resid).mean()[0]
mae

Unnamed: 0,MAE (%)
TS,0.01799
CS,0.007915


- CS R Squared = 0.366
- TS R Squared = 0.656
- However, the R square of TS is not important while its important for CS which suggests a better if to the model. 

In [23]:
ts_FF['R-squared'].mean()

0.6560260105076539

In [28]:
factors_ts = ts_FF.copy() 
factors_ts.rename(columns = {'Market beta': 'MKT', 'Value beta': 'HML', 'Momentum beta': 'UMD'}, inplace=True) 
(factors_ts[['MKT', 'HML', 'UMD']] * factors.mean() * 12).sum(axis=1)

NoDur    0.070175
Durbl    0.091252
Manuf    0.090680
Enrgy    0.103782
HiTec    0.072225
Telcm    0.067360
Shops    0.077421
Hlth     0.065095
Utils    0.059801
Other    0.101211
dtype: float64

In [33]:
factors_cs = CS.copy()
factors_cs.rename(columns= {'Market regression coefficient':'MKT','Value regression coefficients':'HML',
                        'Momentum regression coefficients':'UMD'}, inplace=True)
factors_cs[['MKT', 'HML', 'UMD']].values[0] * factors_ts[['MKT', 'HML', 'UMD']]

Unnamed: 0,MKT,HML,UMD
NoDur,0.023659,-0.003226,0.001495
Durbl,0.04069,-0.002737,-0.009697
Manuf,0.033575,-0.003113,-0.001112
Enrgy,0.031744,-0.010043,0.002278
HiTec,0.03695,0.010045,-0.004261
Telcm,0.026788,-0.001488,-0.002561
Shops,0.030295,0.000666,-0.000455
Hlth,0.024238,0.001891,0.002244
Utils,0.016888,-0.005566,0.003291
Other,0.035685,-0.006728,-0.001475


In [34]:
predicted_cs = factors_cs['Annualized Intercept'][0] + (factors_ts[['MKT','HML','UMD']] * factors_cs[['MKT','HML','UMD']].values[0]).sum(axis=1)
(predicted_cs).nlargest(1).to_frame('CS Predicted Premium with intercept')

Unnamed: 0,CS Predicted Premium with intercept
HiTec,0.10645


# RETURNS OVER TIME

Based on the concept of time diversification, the longer the investment horizon, the higher the sharpe rati. If returns can be perceived as iid, then every return is an independent asset. THerefore, with an increase in the time, there can be seen to have more independnet assets and thus, rise in diverisfication, leading to a safer investment in the long run. 
- however, it can be riskier as this assumes a log return model without accounting for tail behaviors

In [11]:
from scipy.stats import norm 
import numpy as np 
def prob(mu, sigma, h): 
    return norm.cdf(np.sqrt(h) * mu/sigma) 



In [20]:
factors_cols = list(factors.columns) 
total_factors = factors.copy()
for factor in factors_cols: 
    total_factors[factor] = total_factors[factor] + rf['RF']
factors_log = np.log(total_factors + 1) 
factors_log

Unnamed: 0_level_0,MKT,HML,UMD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1980-01-31,0.061189,0.025180,0.080197
1980-02-29,-0.003305,0.014889,0.084065
1980-03-31,-0.124317,0.001998,-0.087084
1980-04-30,0.050978,0.022935,0.008266
1980-05-31,0.058929,0.011830,-0.003105
...,...,...,...
2023-04-30,0.009554,0.003095,0.019508
2023-05-31,0.007075,-0.076449,-0.002704
2023-06-30,0.066349,0.001399,-0.019897
2023-07-31,0.035946,0.044591,-0.035938


In [21]:
def portfolio_metrics_basic(returns: pd.DataFrame, annualization_factor: float) -> pd.DataFrame: 
    '''
    Returns Annualized ER, Vol, SR, 
    '''
    
    returns_df = pd.DataFrame(index = returns.columns) 
    mu = returns.mean() * annualization_factor
    vol = returns.std() * np.sqrt(annualization_factor)
    sr = mu / vol

    
    returns_df['Annnualized Return'] = mu.values 
    returns_df['Annnualized Vol'] = vol.values 
    returns_df['Annnualized SR'] = sr.values
    
    return returns_df
annualization_factor = 12
log_spread = factors_log['MKT'] - factors_log['HML'] 
metrics = portfolio_metrics_basic(log_spread.to_frame('log_spread'),annualization_factor)
metrics

Unnamed: 0,Annnualized Return,Annnualized Vol,Annnualized SR
log_spread,0.052137,0.209933,0.248351


In [22]:
prob(metrics.loc['log_spread', 'Annnualized Return'], metrics.loc['log_spread', 'Annnualized Vol'], 5)

0.7106656048930736