In [340]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from arch import arch_model
from arch.univariate import GARCH, EWMAVariance 
from sklearn import linear_model
import scipy.stats as stats
from statsmodels.regression.rolling import RollingOLS
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.precision", 4)
sns.set(rc={'figure.figsize':(15, 10)})

In [341]:
#Calculate mean, standard deviation and sharpe ratio
def mean_vol_sharpe(df,ann=12):
    mean = df.mean() * ann
    volatility = df.std() * np.sqrt(ann)
    sharpe_ratio = mean/volatility
    return pd.DataFrame({'mean': mean, 'volatility': volatility, 'sharpe_ratio': sharpe_ratio})

In [342]:
def tail_risk(df):
    tr_df = pd.DataFrame(data = None)
    tr_df['Min return'] = df.min()
    tr_df['VaR-5th'] = df.quantile(.05)
    cum_ret = (1 + df).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    tr_df['Max Drawdown'] = drawdown.min()
    
    return tr_df

In [343]:
def reg_params(df, y_col, X_col, intercept = True, annual_fac=12):
    y = df[y_col]
    if intercept == True:
        X = sm.add_constant(df[X_col])
    else:
        X = df[X_col]
    
    model = sm.OLS(y, X, missing = 'drop').fit()
    reg_df = model.params.to_frame('Regression Parameters')
    reg_df.loc['r-squared'] = model.rsquared
    
    if intercept == True:
        reg_df.loc['const'] *= annual_fac
    
    return reg_df

In [344]:
def summary_stats_bm(series, bm, annual_fac=12):
    ss_df = pd.DataFrame(data = None, index = ['Summary Stats'])
    ss_df['Mean'] = series.mean() * annual_fac
    ss_df['Vol'] = series.std() * np.sqrt(annual_fac)
    ss_df['Sharpe (Mean/Vol)'] = ss_df['Mean'] / ss_df['Vol']
    
    y = series
    X = sm.add_constant(bm.loc[series.index])
    reg = sm.OLS(y,X).fit().params
    ss_df['alpha'] = reg[0] * annual_fac
    ss_df['beta'] = reg[1]
    
    cum_ret = (1 + series).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    ss_df['Max Drawdown'] = drawdown.min()
    
    return round(ss_df, 4)

In [345]:
def OOS_r2(df, factors, start):
    y = df['SPY']
    X = sm.add_constant(df[factors])

    forecast_err, null_err = [], []

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            null_forecast = currY.mean()
            reg_predict = reg.predict(X.iloc[[i]])
            actual = y.iloc[[i]]
            forecast_err.append(reg_predict - actual)
            null_err.append(null_forecast - actual)
            
    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()
    
    return 1 - RSS/TSS

In [346]:
def OOS_strat(df, factors, start, weight):
    returns = []
    y = df['SPY']
    X = sm.add_constant(df[factors])

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            pred = reg.predict(X.iloc[[i]])
            w = pred * weight
            returns.append((df.iloc[i]['SPY'] * w)[0])

    df_strat = pd.DataFrame(data = returns, index = df.iloc[-(len(returns)):].index, columns = ['Strat Returns'])
    return df_strat

In [347]:
signals = pd.read_excel('gmo_analysis_data.xlsx',1, index_col=0)
gmo = pd.read_excel('gmo_analysis_data.xlsx',2, index_col=0)
rf = pd.read_excel('gmo_analysis_data.xlsx',3, index_col=0)
gmo_ex = gmo.dropna().subtract(rf["US3M"],axis=0).dropna()
gmo_ex.head(3)

Unnamed: 0,SPY,GMWAX
1996-11-30,0.0687,0.0409
1996-12-31,-0.0282,-0.0156
1997-01-31,0.0575,0.0104


In [348]:
signals.head(4)

Unnamed: 0,DP,EP,US10Y
1993-02-28,2.82,4.44,6.03
1993-03-31,2.77,4.41,6.03
1993-04-30,2.82,4.44,6.05
1993-05-31,2.81,4.38,6.16


## 2 Analyzing GMO

#### 2.1 Examine GMO’s performance. Use the risk-free rate to convert the total returns to excess returns
#### Calculate the mean, volatility, and Sharpe ratio for GMWAX. Do this for three samples:

- from inception through 2011
- 2012-present
- inception - present
Has the mean, vol, and Sharpe changed much since the case?

In [349]:
mean_vol_sharpe(gmo_ex.loc[:'2011',['GMWAX']])

Unnamed: 0,mean,volatility,sharpe_ratio
GMWAX,0.0158,0.125,0.1266


In [350]:
mean_vol_sharpe(gmo_ex.loc['2012':,['GMWAX']])

Unnamed: 0,mean,volatility,sharpe_ratio
GMWAX,0.0366,0.092,0.3982


In [351]:
mean_vol_sharpe(gmo_ex[['GMWAX']])

Unnamed: 0,mean,volatility,sharpe_ratio
GMWAX,0.0245,0.1123,0.2181


#### Ans:- GMO's performance 2012 onwards has improved significantly. The sharpe ratio has more than tripled, owing to the higher mean and lower volatility.

#### 2.2 GMO believes a risk premium is compensation for a security’s tendency to lose money at “bad times”. For all three samples, analyze extreme scenarios by looking at
- Min return
- 5th percentile (VaR-5th)
- Maximum drawdown

In [352]:
tail_risk(gmo_ex.loc[:'2011',['GMWAX','SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.1492,-0.0598,-0.4729
SPY,-0.1656,-0.0802,-0.56


In [353]:
tail_risk(gmo_ex.loc['2012':,['GMWAX','SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.1187,-0.0397,-0.226
SPY,-0.1247,-0.0687,-0.2481


In [354]:
tail_risk(gmo_ex[['GMWAX','SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.1492,-0.0483,-0.4729
SPY,-0.1656,-0.08,-0.56


#### (a) Does GMWAX have high or low tail-risk as seen by these stats?
#### (b) Does that vary much across the two subsamples?

#### Ans (a):- GMO has lower tail risk compared to SPY across all periods, across all statistics (Min Ret., VaR-5th and Max Drawdown).

#### Ans (b):- Yes, the Max-Drawdown has decreased significantly from 2012 to present.

#### 2.3 For all three samples, regress excess returns of GMWAX on excess returns of SPY.
- (a) Report the estimated alpha, beta, and r-squared.

In [355]:
gmo_ex_dropna = gmo_ex.dropna()

#### 1996-2011

In [356]:
reg_params(gmo_ex.loc[:'2011'], 'GMWAX', 'SPY')

Unnamed: 0,Regression Parameters
const,-0.0058
SPY,0.5396
r-squared,0.5071


#### 2012-present

In [357]:
reg_params(gmo_ex.loc['2012':], 'GMWAX', 'SPY')

Unnamed: 0,Regression Parameters
const,-0.0345
SPY,0.5622
r-squared,0.7645


#### Full Sample

In [358]:
reg_params(gmo_ex, 'GMWAX', 'SPY')

Unnamed: 0,Regression Parameters
const,-0.017
SPY,0.5456
r-squared,0.5777


- (b) Is GMWAX a low-beta strategy? Has that changed since the case?
- (c) Does GMWAX provide alpha? Has that changed across the subsamples?

#### Ans (b):- Yes, GMWAX is a low-beta strategy. The beta has not changed significantly across the subsamples, and stayed around 0.55

#### Ans (c):- No, GMWAX does not provide alpha. The alpha has stayed very close to 0 or slightly negative (~ 0.05% - 0.2%) throuhgout the subsamples.

## 3 Forecast Regressions

#### Consider the lagged regression, where the regressor, $(X,)$ is a period behind the target, $(r^{SPY} )$.
#### $r^{SPY} = α^{SPY,X} +(β^{SPY,X})^′ X_{t-1} + \epsilon^{SPY,X}_{t}$ ....(1)

#### 3.1 Estimate $(1)$ and report the $\mathbb{R}^2$, as well as the OLS estimates for $α$ and $β$. Do this for...
- X as a single regressor, the dividend-price ratio.
- X as a single regressor, the earnings-price ratio.
- X as three regressors, the dividend-price ratio, the earnings-price ratio, and the 10-year yield.
For each, report the r-squared.

In [359]:
signals_copy = signals.copy()
signals_copy['SPY'] = gmo['SPY']

In [360]:
signals = signals.shift()

signals['SPY'] = gmo['SPY']
signals.head()

Unnamed: 0,DP,EP,US10Y,SPY
1993-02-28,,,,0.0107
1993-03-31,2.82,4.44,6.03,0.0224
1993-04-30,2.77,4.41,6.03,-0.0256
1993-05-31,2.82,4.44,6.05,0.027
1993-06-30,2.81,4.38,6.16,0.0037


In [361]:
dp = reg_params(signals, 'SPY', 'DP')
dp

Unnamed: 0,Regression Parameters
const,-0.1129
DP,0.0094
r-squared,0.0094


In [362]:
ep = reg_params(signals, 'SPY', 'EP')
ep

Unnamed: 0,Regression Parameters
const,-0.0712
EP,0.0032
r-squared,0.0086


In [364]:
ep_dp_10y = reg_params(signals, 'SPY', ['EP','DP','US10Y'])
ep_dp_10y

Unnamed: 0,Regression Parameters
const,-0.1792
EP,0.0027
DP,0.008
US10Y,-0.001
r-squared,0.0163


#### 3.2 For each of the three regressions, let’s try to utilize the resulting forecast in a trading strategy.
- Build the forecasted SPY returns: $\hat{r}^{SPY}_{t+1}$ . Note that this denotes the forecast made using $X_t$ to forecast the $(t + 1)$ return
- Set the scale of the investment in SPY equal to 100 times the forecasted value:   <br> $ \omega_t = 100 \hat{r}^{SPY}_{t+1} $
- We are not taking this scaling too seriously. We just want the strategy to go bigger in periods where the forecast is high and to withdraw in periods where the forecast is low, or even negative.
- Calculate the return on this strategy: <br> $ r^{X}_{t+1} = \omega_t r^{SPY}_{t+1} $

In [365]:
w_dp = 100 * (dp.loc['const'][0]/12 + dp.loc['DP'][0] * signals['DP'])

r_dp = (w_dp * signals['SPY']).dropna()

w_ep = 100 * (ep.loc['const'][0]/12 + ep.loc['EP'][0] * signals['EP'])

r_ep = (w_ep * signals['SPY']).dropna()

w_3fac = 100 * (ep_dp_10y.loc['const'][0]/12 + ep_dp_10y.loc['EP'][0] * signals['EP']\
                                             + ep_dp_10y.loc['DP'][0] * signals['DP']\
                                             + ep_dp_10y.loc['US10Y'][0] * signals['US10Y'])
r_3fac = (w_3fac * signals['SPY']).dropna()


In [403]:
def lagged_reg(df, y_col, X_col, weight=100, lag=1, intercept = True, annual_fac=12):
    y = df[y_col]
    if intercept == True:
        X = sm.add_constant(df[X_col].shift(lag))
    else:
        X = df[X_col].shift(lag)
    
    model = sm.OLS(y, X, missing = 'drop').fit()
    reg_df = model.params.to_frame('Regression Parameters')
    reg_df.loc['r-squared'] = model.rsquared
    
    if intercept == True:
        reg_df.loc['const'] *= annual_fac
        final = reg_df.loc['const'][0]/12
    else:
        final = 0
    
    reg_df = reg_df.drop('const')
    reg_df = reg_df.drop('r-squared')
    
    for i in reg_df.index:
        final += reg_df.loc[i][0] * df[i]
    
    final = final * weight
    
    final_series = (final * df[y_col]).dropna()
    
    return final

In [404]:
lagged_reg(signals_copy, 'SPY', ['EP','DP','US10Y'])

1993-02-28   -0.0151
1993-03-31   -0.0317
1993-04-30    0.0362
1993-05-31   -0.0381
1993-06-30   -0.0052
               ...  
2022-06-30    0.1165
2022-07-31   -0.1301
2022-08-31    0.0576
2022-09-30    0.1306
2022-10-31   -0.1148
Name: SPY, Length: 357, dtype: float64

#### You should now have the trading strategy returns, $r^x$ for each of the forecasts. For each strategy, estimate
- mean, volatility, Sharpe
- max-drawdown
- market alpha
- market beta
- market Information ratio

In [368]:
summary_stats_bm(r_dp, signals['SPY'])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
Summary Stats,0.1095,0.149,0.7348,0.0207,0.8611,-0.653


In [369]:
summary_stats_bm(r_ep, signals['SPY'])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
Summary Stats,0.1078,0.1286,0.8383,0.0322,0.7327,-0.3823


In [370]:
summary_stats_bm(r_3fac, signals['SPY'])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
Summary Stats,0.125,0.1456,0.8588,0.0451,0.775,-0.5221


#### 3.3 GMO believes a risk premium is compensation for a security’s tendency to lose money at “bad times”. Let’s consider risk characteristics.

3.3(a) For both strategies, the market, and GMO, calculate the monthly VaR for π = .05. Just use the quantile of the historic data for this VaR calculation.

In [371]:
var = pd.DataFrame(data = [r_dp.quantile(0.05), r_ep.quantile(0.05), r_3fac.quantile(0.05), signals['SPY'].quantile(.05), gmo['GMWAX'].quantile(0.05)], index = ['DP-strat', 'EP-strat', '3-factor-strat', 'SPY', 'GMWAX'], columns=['VaR-5th'])
var

Unnamed: 0,VaR-5th
DP-strat,-0.0523
EP-strat,-0.0541
3-factor-strat,-0.0642
SPY,-0.0739
GMWAX,-0.0473


3.3 (b) The GMO case mentions that stocks under-performed short-term bonds from 2000-2011. Does the dynamic portfolio above under-perform the risk-free rate over this time?

Ans:- No, the dynamic portfolio does not under-perform the risk-free rate over this time. The vols for DP are higher than the risk-free rate.

In [372]:
mean_vol_sharpe(r_dp.to_frame('DP-strat').loc['2000':'2011'])

Unnamed: 0,mean,volatility,sharpe_ratio
DP-strat,0.0393,0.1842,0.2135


In [373]:
mean_vol_sharpe(r_ep.to_frame('EP-strat').loc['2000':'2011'])

Unnamed: 0,mean,volatility,sharpe_ratio
EP-strat,0.0373,0.1339,0.2784


In [374]:
mean_vol_sharpe(r_3fac.to_frame('3-factor-strat').loc['2000':'2011'])

Unnamed: 0,mean,volatility,sharpe_ratio
3-factor-strat,0.0608,0.1574,0.3863


In [375]:
mean_vol_sharpe(rf.loc['2000':'2011'])

Unnamed: 0,mean,volatility,sharpe_ratio
US3M,0.0231,0.0058,3.9866


3.3 (c) Based on the regression estimates, in how many periods do we estimate a negative risk premium?

In [376]:
compare = r_dp.to_frame('DP-strat')
compare['EP-strat'] = r_ep
compare['3-factor-strat'] = r_3fac
compare['rf'] = rf['US3M']

compare_premium = pd.DataFrame(data = None, index = ["% of periods underperforming risk-free"])
for col in compare.columns[:3]:
    compare_premium[col] = len(compare[compare[col]<compare['rf']])/len(compare) *100

compare_premium

Unnamed: 0,DP-strat,EP-strat,3-factor-strat
% of periods underperforming risk-free,37.3596,37.3596,37.0787


#### 3.3 (d) Do you believe the dynamic strategy takes on extra risk??

No, we do not believe the dynamic strategy takes on extra risk. The strategy has lower VaR than SPY.

## 4 Out-of-Sample Forecasting
This section utilizes data in the file, gmo analysis data.xlsx.

Reconsider the problem above, of estimating (1) for $x$. The reported $\mathbb{R}^2$ was the in-sample $\mathbb{R}^2$ –it
examined how well the forecasts fit in the sample from which the parameters were estimated. <br>
**In particular, focus on the case of using both dividend-price and earnings-price as signals.** <br>
Let’s consider the out-of-sample r-squared. To do so, we need to do the following:

- Start at $t=60$.
- Estimate (1) only using data through time $t$.
- Use the estimated parameters of (1), along with $x_{t+1}$ to calculate the out-of-sample forecast for the following period, $t + 1$. <br><br>$\hat{r}^{SPY}_{t+1} = \hat{\alpha}^{SPY,x}_{t} -(\beta^{SPY,x})'x_{t} $<br>
- Calculate the $t + 1$ forecast error, <br><br> $ e^{x}_{t+1} = r^{SPY}_{t+1} + \hat{r}^{SPY}_{t+1} $
- Move to $t = 61$, and loop through the rest of the sample.
You now have the time-series of out-of-sample prediction errors, $e^x$.
Calculate the time-series of out-of-sample prediction errors $e^0$, which are based on the null forecast: <br><br> $ \hat{r}^{SPY}_{t+1} = \frac{1}{t} \sum_{i=1}^{t} r^{SPY}_{i} $ <br><br> $ e^{0}_{t+1} = r^{SPY}_{t+1} - \bar{r}^{SPY}_{t+1} $

#### 4.1 Report the out-of-sample $ \mathbb{R}^2 $ : <br> $ \mathbb{R}^2_{OOS} \equiv 1 - \frac{\sum_{i=61}^{T} (e^{x}_{i})^2}{\sum_{i=61}^{T} (e^{0}_{i})^2} $ <br> Note that unlike an in-sample r-squared, the out-of-sample r-squared can be anywhere between $(−∞, 1]$. 

In [377]:
ep_oos_r2 = OOS_r2(signals, ['EP'], 60)

print('EP OOS R2: ', round(ep_oos_r2,4))

EP OOS R2:  -0.007


In [378]:
dp_oos_r2 = OOS_r2(signals, ['DP'], 60)

print('DP OOS R2: ', round(dp_oos_r2,4))

DP OOS R2:  -0.0024


#### 4.1 Did this forecasting strategy produce a positive OOS r-squared?

No, the $ \mathbb{R}^2_{OOS}$ is negative.

#### 4.2. Re-do problem 3.2 using this OOS forecast. How much better/worse is the OOS strategy compared to the in-sample version of 3.2?

In [379]:
oos_ep = OOS_strat(signals, ['EP'], 60, 100)
oos_ep.head()

Unnamed: 0,Strat Returns
1998-02-28,0.0512
1998-03-31,0.0456
1998-04-30,0.0143
1998-05-31,-0.0225
1998-06-30,0.0351


In [380]:
oos_dp = OOS_strat(signals, ['DP'], 60, 100)
oos_dp.head()

Unnamed: 0,Strat Returns
1998-02-28,0.1643
1998-03-31,0.1375
1998-04-30,0.0395
1998-05-31,-0.0619
1998-06-30,0.1092


In [381]:
summary_stats_bm(oos_ep['Strat Returns'], signals['SPY'])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
Summary Stats,0.0819,0.1654,0.4953,0.0353,0.5435,-0.5837


In [382]:
summary_stats_bm(oos_dp['Strat Returns'], signals['SPY'])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
Summary Stats,0.0798,0.1763,0.4525,-0.0069,1.0109,-0.5519


The strategy is significantly worse than the in-sample version of 3.2, having lower mean and sharpe ratio.

#### 4.3. Re-do problem 3.3 using this OOS forecast. Is the point-in-time version of the strategy riskier?

#### 4.3 (a)

In [383]:
var_oos = pd.DataFrame([oos_ep['Strat Returns'].quantile(0.05), signals['SPY'].quantile(0.05), gmo['GMWAX'].quantile(0.05)], index = ['EP-strat', 'SPY', 'GMO'], columns=['VaR-5th'])
var_oos

Unnamed: 0,VaR-5th
EP-strat,-0.071
SPY,-0.0739
GMO,-0.0473


In [384]:
var_oos_dp = pd.DataFrame([oos_dp['Strat Returns'].quantile(0.05), signals['SPY'].quantile(0.05), gmo['GMWAX'].quantile(0.05)], index = ['DP-strat', 'SPY', 'GMO'], columns=['VaR-5th'])
var_oos_dp

Unnamed: 0,VaR-5th
DP-strat,-0.0726
SPY,-0.0739
GMO,-0.0473


#### 4.3 (b)

In [385]:
summary_stats_bm(oos_ep.loc['2000':'2011']['Strat Returns'], gmo[['SPY']])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
Summary Stats,0.0388,0.1959,0.1979,0.0333,0.2994,-0.5837


In [386]:
summary_stats_bm(oos_dp.loc['2000':'2011']['Strat Returns'], gmo[['SPY']])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),alpha,beta,Max Drawdown
Summary Stats,-0.0109,0.1632,-0.0667,-0.0282,0.9551,-0.5519


In [387]:
mean_vol_sharpe(rf.loc['2000':'2011'])

Unnamed: 0,mean,volatility,sharpe_ratio
US3M,0.0231,0.0058,3.9866


The dynamic strategy has around 3.8% mean compared to 2.3% of the risk free rate, which does mean that strategy is slightly better. The DP has a negative mean, which means that it is underperforming the risk free rate.

#### 4.3(c)

In [388]:
r_df_OOS = oos_ep.rename(columns={"Strat Returns": "EP Strat"})
r_df_OOS['rf'] = rf['US3M']
r_df_OOS['DP-strat'] = oos_dp['Strat Returns']

df_riskprem2 = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df_OOS.columns[:3]:
    df_riskprem2[col] = len(r_df_OOS[r_df_OOS[col] < r_df_OOS['rf']])/len(r_df_OOS) * 100
    
df_riskprem2

Unnamed: 0,EP Strat,rf,DP-strat
% of periods underperforming $r^{f}$,38.3838,0.0,39.0572


#### 4.3(d)

THe Dynamic strategy does take on extra risk as it has worse risk metrics than SPY.