In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from arch import arch_model
from arch.univariate import GARCH, EWMAVariance
from sklearn import linear_model
import scipy.stats as stats
from statsmodels.regression.rolling import RollingOLS
import seaborn as sns
import warnings
import ast
warnings.filterwarnings("ignore")
pd.set_option("display.precision", 4)

## Functions

In [334]:
def summary_stat(df, annual_factor, q=0.05):
    '''summary assets' mean return, voaltility(stdev) and sharpe ratio'''
    result = pd.DataFrame()
    result["mean"] = df.mean() * annual_factor
    result["volatility"] = df.std() * np.sqrt(annual_factor)
    result["Sharpe Ratio"] = result["mean"]/result["volatility"]

    return result

def sub_summarize(df, year_list, annualize=12):
    col_name = ["mean", "vol", "Sharpe"]
    res = pd.DataFrame(columns=col_name)
    i = 0
    for y in year_list:
        sub1 = df[y[0]: y[1]]
        res.loc[i, "mean"] = sub1.mean() * annualize
        res.loc[i, "vol"] = sub1.std() * np.sqrt(annualize)
        res.loc[i, "Sharpe"] = res.loc[i, "mean"]/res.loc[i, "vol"]
        # res.loc[i, "skewness"] = sub1.skew()

        i += 1
    res.index = pd.Series([repr(y)[1:-1] for y in year_list])
    return res

In [348]:
def tailMetrics(ret_ex, ret, quantile=.05):
    
    # returns: excess return
    # ret: total return
    metrics = pd.DataFrame(index=ret.columns)

    metrics["min return"] = ret_ex.min()
    metrics[f'VaR ({quantile})'] = ret_ex.quantile(quantile)

    cum_ret = (1 + ret).cumprod()
    rolling_max = cum_ret.cummax()  # cummax function
    drawdown = (cum_ret - rolling_max) / rolling_max
    metrics['Max Drawdown'] = drawdown.min()

    return metrics

In [364]:
def get_capm_matrics(targets, regressors, add_constant=True, annualize_factor=12):

    result = pd.DataFrame(index=targets.columns)

    if add_constant:
        X = sm.add_constant(regressors)
    else:
        X = regressors.copy()
    for column in targets.columns:
        y = targets[[column]]
        model = sm.OLS(y, X, missing='drop').fit()
        if add_constant:
            result.loc[column, "alpha"] = model.params['const'] * \
                annualize_factor
        result.loc[column, regressors.columns] = model.params[regressors.columns]

        result.loc[column, "R-squared"] = model.rsquared

    return result

In [170]:
def summary_stats_bm(series, bm, annual_fac=12):
    ss_df = pd.DataFrame(data=None)
    ss_df['Mean'] = series.mean() * annual_fac
    ss_df['Vol'] = series.std() * np.sqrt(annual_fac)
    ss_df['Sharpe (Mean/Vol)'] = ss_df['Mean'] / ss_df['Vol']

    X = sm.add_constant(bm.loc[series.index])
    alphas, betas = [], []
    for col in series.columns:
        y = series[[col]]
        reg = sm.OLS(y, X).fit().params
        alphas.append(reg[0] * annual_fac)
        betas.append(reg[1])
    ss_df["alpha"] = alphas
    ss_df["beta"] = betas

    cum_ret = (1 + series).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    ss_df['Max Drawdown'] = drawdown.min()

    return ss_df

## 2 Analyzing GMO
### 2.1 Calculate the mean, volatility, and Sharpe ratio for GMWAX.

In [35]:
ret = pd.read_excel("../data/gmo_analysis_data.xlsx", sheet_name=2, index_col=0)
rf = pd.read_excel("../data/gmo_analysis_data.xlsx", sheet_name=3, index_col=0)
GMWAX_ex = (ret["GMWAX"] - rf["US3M"]).dropna()

In [346]:
year_list = [("1996", "2011"), ("2012", "2022"),("1996", "2022")]
sub_summarize(GMWAX_ex, year_list, annualize=12)

Unnamed: 0,mean,vol,Sharpe
"'1996', '2011'",0.0158,0.125,0.1266
"'2012', '2022'",0.0366,0.092,0.3982
"'1996', '2022'",0.0245,0.1123,0.2181


### 2.2 GMO believes a risk premium is compensation for a security’s tendency to lose money at “bad times”. For all three samples, analyze extreme scenarios by looking at

In [350]:
ret_ex = ret.subtract(rf["US3M"], axis=0).dropna()
tailMetrics(ret_ex, ret.dropna(), quantile=.05)

Unnamed: 0,min return,VaR (0.05),Max Drawdown
SPY,-0.1656,-0.08,-0.508
GMWAX,-0.1492,-0.0483,-0.3552


#### (a) Does GMWAX have high or low tail-risk as seen by these stats? 

* Compared to SPY, yes. It has lower VaR and CVaR, also a lower max drawdown. 

#### (b) Does that vary much across the two subsamples?

* Yes, GMO's VaR improves notably in the second subsample, and drawdown is also much lower in the second subsample. 

In [362]:
sub1_ex, sub1_ret = ret_ex[:"2011"].dropna(), ret[:"2011"].dropna()
sub2_ex, sub2_ret = ret_ex["2012": "2022"], ret["2012": "2022"]
sub1 = tailMetrics(sub1_ex, sub1_ret,
                   quantile=.05)
sub2 = tailMetrics(sub2_ex, sub2_ret,
                   quantile=.05)
tail1 = pd.concat([sub1, sub2])
tail1.index = ["SPY: 1996-2011", "GMWAX: 1996-2011",
               "SPY: 2012-2022", "GMWAX: 2012-2022"]
tail1

Unnamed: 0,min return,VaR (0.05),Max Drawdown
SPY: 1996-2011,-0.1656,-0.0802,-0.508
GMWAX: 1996-2011,-0.1492,-0.0598,-0.3552
SPY: 2012-2022,-0.1247,-0.0687,-0.2393
GMWAX: 2012-2022,-0.1187,-0.0397,-0.2168


### 2.3 For all three samples, regress excess returns of GMWAX on excess returns of SPY.

#### (a) Report the estimated alpha, beta, and r-squared.

In [365]:
summary = get_capm_matrics(ret_ex[["GMWAX"]], ret_ex[["SPY"]], add_constant=True, annualize_factor=12)
summary

Unnamed: 0,alpha,SPY,R-squared
GMWAX,-0.017,0.5456,0.5777


In [366]:
summary = get_capm_matrics(
    sub1_ex[["GMWAX"]], sub1_ex[["SPY"]], add_constant=True, annualize_factor=12)
print("subsample: 1996-2011")
summary

subsample: 1996-2011


Unnamed: 0,alpha,SPY,R-squared
GMWAX,-0.0058,0.5396,0.5071


In [367]:
summary = get_capm_matrics(
    sub2_ex[["GMWAX"]], sub2_ex[["SPY"]], add_constant=True, annualize_factor=12)
print("subsample: 2012-2022")
summary

subsample: 2012-2022


Unnamed: 0,alpha,SPY,R-squared
GMWAX,-0.0345,0.5622,0.7645


#### (b) Is GMWAX a low-beta strategy? Has that changed since the case?

* GMWAX beta to SPY is around 0.54 to 0.56, as a multiasset allocation strategy, we can consider it a low-beta strategy. 
* In each subsample, beta does not change much, beta varies from 0.54-0.56. 

#### (c) Does GMWAX provide alpha? Has that changed across the subsamples?

For each subsample, alpha < 0. GMWAC didn't provide alpha in either subsample. 

## 3 Forecast Regressions
### 3.1 Consider the lagged regression

**using total return**

In [368]:
factor = pd.read_excel("../data/gmo_analysis_data.xlsx", sheet_name=1, index_col=0)
factor = factor.shift().merge(ret[["SPY"]], left_index=True, right_index=True)
factor.head()

Unnamed: 0,DP,EP,US10Y,SPY
1993-02-28,,,,0.0107
1993-03-31,2.82,4.44,6.03,0.0224
1993-04-30,2.77,4.41,6.03,-0.0256
1993-05-31,2.82,4.44,6.05,0.027
1993-06-30,2.81,4.38,6.16,0.0037


In [369]:
summary1 = get_capm_matrics(
    factor[["SPY"]], factor[["DP"]], add_constant=True, annualize_factor=12)
summary1

Unnamed: 0,alpha,DP,R-squared
SPY,-0.1129,0.0094,0.0094


In [370]:
summary2 = get_capm_matrics(
    factor[["SPY"]], factor[["EP"]], add_constant=True, annualize_factor=12)
summary2

Unnamed: 0,alpha,EP,R-squared
SPY,-0.0712,0.0032,0.0086


In [371]:
summary3 = get_capm_matrics(
    factor[["SPY"]], factor[["DP", "EP", "US10Y"]], add_constant=True, annualize_factor=12)
summary3

Unnamed: 0,alpha,DP,EP,US10Y,R-squared
SPY,-0.1792,0.008,0.0027,-0.001,0.0163


### 3.2 For each of the three regressions, let’s try to utilize the resulting forecast in a trading strategy.

In [157]:
predict1 = factor[["DP"]].dropna() * summary1.iloc[0, 1] + summary1.iloc[0, 0]/12
predict2 = factor[["EP"]].dropna() * summary2.iloc[0, 1] + summary2.iloc[0, 0]/12
predict3 = (factor[["DP", "EP", "US10Y"]].dropna() * summary3.iloc[0, 1:4]).sum(axis=1) + summary3.iloc[0, 0]/12
predict1.columns = ["SPY"]
predict2.columns = ["SPY"]
predict3 = predict3.to_frame("SPY")

In [177]:
predict_all = pd.concat([p*100*factor[["SPY"]] for p in [predict1, predict2, predict3]], axis=1)
predict_all.columns = ["DP", "EP", "3-Factor"]

In [211]:
summary_stats_bm(predict_all.iloc[1:, ], ret.iloc[1:, 0])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
DP,0.1095,0.149,0.7348,0.0207,0.8611,-0.653
EP,0.1078,0.1286,0.8383,0.0322,0.7327,-0.3823
3-Factor,0.125,0.1456,0.8588,0.0451,0.775,-0.5221


### 3.3 GMO believes a risk premium is compensation for a security’s tendency to lose money at “bad times”. Let’s consider risk characteristics.

#### (a) calculate the monthly VaR for π = .05. Just use the quantile of the historic data for this VaR calculation.

In [195]:
predict_all.dropna().quantile(.05).append(ret.quantile(.05)).to_frame("VaR(0.05)")

Unnamed: 0,VaR(0.05)
DP,-0.0523
EP,-0.0541
3-Factor,-0.0642
SPY,-0.0739
GMWAX,-0.0473


#### (b) The GMO case mentions that stocks under-performed short-term bonds from 2000-2011. Does the dynamic portfolio above under-perform the risk-free rate over this time?

* Dynamic portfolio has a higher mean however higher volatility compared to risk-free rate. 
* From the mean return view, dynamic portfolio outperformed risk-free rate. However, if we use Sharpe ratio as an evaluation standard, it under-performed the risk-free rate. 

In [372]:
summary_stat(predict_all.join(rf).dropna()["2000":"2011"], 12)

Unnamed: 0,mean,volatility,Sharpe Ratio
DP,0.0393,0.1842,0.2135
EP,0.0373,0.1339,0.2784
3-Factor,0.0608,0.1574,0.3863
US3M,0.0231,0.0058,3.9866


#### (c) Based on the regression estimates, in how many periods do we estimate a negative risk premium?

In [402]:
predict_excess = predict_all.merge(rf, left_index=True, right_index=True)
for col in predict_excess.columns:
    predict_excess[col] = predict_excess[col] - predict_excess["US3M"]
predict_excess = predict_excess.iloc[1:, :3]
predict_excess

Unnamed: 0,DP,EP,3-Factor
1993-03-31,0.0361,0.0160,0.0278
1993-04-30,-0.0453,-0.0234,-0.0358
1993-05-31,0.0437,0.0197,0.0338
1993-06-30,0.0037,0.0004,0.0022
1993-07-31,-0.0108,-0.0064,-0.0090
...,...,...,...
2022-06-30,-0.0460,-0.0787,-0.0636
2022-07-31,0.0538,0.0973,0.0821
2022-08-31,-0.0276,-0.0423,-0.0378
2022-09-30,-0.0518,-0.0956,-0.0741


In [403]:
predict_excess[predict_excess >= 0] = 0
predict_excess[predict_excess < 0] = 1
underperform_rate = (predict_excess.sum(
    axis=0)/predict_excess.shape[0]*100).to_frame("Underperformed (%)")
underperform_rate

Unnamed: 0,Underperformed (%)
DP,37.3596
EP,37.3596
3-Factor,37.0787


#### (d) Do you believe the dynamic strategy takes on extra risk??

* No, judging by the tail risk metrics and volatility of the dynamic strategies compared to SPY it does not seem like these strategies take on extra risk on the whole.

## 4 Out-of-Sample Forecasting
#### 4.1 Report the out-of-sample R2

In [431]:
# model = RollingOLS.from_formula("SPY ~ DP + EP", data=factor, window=factor.shape[0], min_nobs=60, expanding=True).fit()
factor = pd.read_excel("../data/gmo_analysis_data.xlsx",
                       sheet_name=1, index_col=0)
factor = factor.join(ret)
model = RollingOLS(
    factor[["SPY"]], sm.add_constant(factor[["DP", "EP"]]), window=factor.shape[0], min_nobs=60, expanding=True).fit()

params = model.params.shift()
params.tail()

Unnamed: 0,const,DP,EP
2022-06-30,0.0076,-0.0027,0.0014
2022-07-31,0.0071,-0.0022,0.0013
2022-08-31,0.0077,-0.0026,0.0013
2022-09-30,0.0073,-0.0023,0.0013
2022-10-31,0.0071,-0.0019,0.0011


In [432]:
predict_spy = (params[["DP", "EP"]] * factor[["DP", "EP"]]
               ).sum(axis=1) + params["const"]
predict_err = np.square(factor["SPY"].subtract(predict_spy)).sum()
predict_err

0.6297038901908676

In [435]:
zero_pre_spy = factor[["SPY"]].expanding(60).mean().shift()
zero_pre_err = np.square((factor[["SPY"]] - zero_pre_spy))
zero_pre_err = zero_pre_err.sum()

In [436]:
print("R-squared: " + str(1 - predict_err/zero_pre_err[0]))

R-squared: -0.032209205517152784


###  4.2 Re-do problem 3.2 using this OOS forecast. How much better/worse is the OOS strategy compared to the in-sample version of 3.2?

* Worse. 

In [441]:
oos_port = (predict_spy * 100 * ret["SPY"]).to_frame("oos_portfolio")
summary_stats_bm(oos_port.iloc[60:, ], ret.iloc[60:, 0])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
oos_portfolio,0.0971,0.2608,0.3723,0.0548,0.4935,-0.8028


### 4.3 Re-do problem 3.3 using this OOS forecast.

* OOS forecast has higher risk than insample forecast. 

In [442]:
tail_risk = oos_port.dropna().quantile(.05).append(
    predict_all.dropna().quantile(.05)).to_frame("VaR(0.05)")
tail_risk

Unnamed: 0,VaR(0.05)
oos_portfolio,-0.083
DP,-0.0523
EP,-0.0541
3-Factor,-0.0642


In [443]:
summary_stat(oos_port.join(predict_all).dropna(), 12)

Unnamed: 0,mean,volatility,Sharpe Ratio
oos_portfolio,0.0971,0.2608,0.3723
DP,0.0854,0.1519,0.5623
EP,0.0848,0.1301,0.6514
3-Factor,0.1063,0.1501,0.7079


In [455]:
excess = oos_port.join(rf).dropna()
excess["oos_portfolio"] = excess["oos_portfolio"] - excess["US3M"]
excess = excess

In [457]:
oos_rate = ((excess[["oos_portfolio"]] < 0).sum() /
            excess.shape[0]).to_frame("Underperformed (%)") * 100
underperform_rate.append(oos_rate)

Unnamed: 0,Underperformed (%)
DP,37.3596
EP,37.3596
3-Factor,37.0787
oos_portfolio,37.7104
