In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

from sklearn import linear_model
import scipy.stats as stats
from statsmodels.regression.rolling import RollingOLS
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.precision", 4)
sns.set(rc={'figure.figsize':(15, 10)})

# Part 2 Analyzing GMO
# Examine GMO's performance. Use the risk-free rate to convert the total returns to excess returns
## 1. Calculate the mean, volatility, and Sharpe ratio for GMWAX. Do this for three samples:


In [126]:
signals = pd.read_excel('gmo_analysis_data.xlsx', sheet_name=1).set_index('Date').dropna()
rf = pd.read_excel('gmo_analysis_data.xlsx', sheet_name=3).set_index('Date')
GMO = pd.read_excel('gmo_analysis_data.xlsx', sheet_name=2).set_index('Date')
GMO_ex = GMO.dropna().subtract(rf['US3M'], axis=0).dropna()


### from inception through 2011

In [3]:
def summary_stats(df, annual_fac = 12):
    stats_df = pd.DataFrame(data = None)
    stats_df['Mean'] = df.mean()*annual_fac
    stats_df['Vol'] = df.std()*np.sqrt(annual_fac)
    stats_df['Sharpe'] = df.mean()/df.std()*np.sqrt(annual_fac)
    
    return stats_df

In [4]:
summary_stats(GMO_ex.loc[:'2011',['GMWAX']] )

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.0158,0.125,0.1266


### 2012-present

In [5]:
summary_stats(GMO_ex.loc['2012':,['GMWAX']] )

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.0366,0.092,0.3982


### inception - present

In [6]:
summary_stats(GMO_ex[['GMWAX']])


Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.0245,0.1123,0.2181


## Has the mean, vol, and Sharpe changed much since the case?
Since 2012 GMO perfomanced better that it did at first stage in terms of mean, vol and sharpe ratio.

## 2. GMO believes a risk premium is compensation for a security's tendency to lose money at bad times". For all three samples, analyze extreme scenarios by looking at:
### Min return
### VaR-5th
### Maximum drawdown


In [7]:
def tail_risk(df,df_total):
    tr_df = pd.DataFrame(data = None)
    tr_df['Min return'] = df.min()
    tr_df['VaR-5th'] = df.quantile(.05)
    cum_ret = (1 + df_total).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    tr_df['Max Drawdown'] = drawdown.min()
    
    return tr_df

In [8]:
tail_2011 = tail_risk(GMO_ex.loc[:'2011'],GMO.loc[:'2011'])
tail_2011

Unnamed: 0,Min return,VaR-5th,Max Drawdown
SPY,-0.1656,-0.0802,-0.508
GMWAX,-0.1492,-0.0598,-0.3552


In [9]:
tail_2012 = tail_risk(GMO_ex.loc['2012':],GMO.loc['2012':])
tail_2012

Unnamed: 0,Min return,VaR-5th,Max Drawdown
SPY,-0.1247,-0.0687,-0.2393
GMWAX,-0.1187,-0.0397,-0.2168


In [10]:
tail_total = tail_risk(GMO_ex,GMO)
tail_total

Unnamed: 0,Min return,VaR-5th,Max Drawdown
SPY,-0.1656,-0.08,-0.508
GMWAX,-0.1492,-0.0483,-0.3552


## a) Does GMWAX have high or low tail-risk as seen by these stats?
GWMAX has lower tail_risk than SPY in terms of all three statitics.

## b) Does that vary much across the two subsamples?
Yes,in subsample from 2012 to recent, the max drawdown and min return improved a lot.

## 3. For all three samples, regress excess returns of GMWAX on excess returns of SPY.
## a) Report the estimated alpha, beta, and r-squared.

In [23]:
def reg(df, y_col, X_col, annual_fac=12):
    y = df[y_col]
    X = sm.add_constant(df[X_col])
    model = sm.OLS(y, X, missing = 'drop').fit()
    reg_df = model.params.to_frame('Regression Parameters')
    reg_df.loc[r'$R^{2}$'] = model.rsquared
    reg_df.loc['const'] *= annual_fac
    
    #rename index
    reg_df.rename(index = {'const':'Alpha','SPY':'SPY_beta','DP':'DP_beta','EP':'EP_beta','US10Y':'US10Y_beta'},inplace = True)
    return reg_df

In [12]:
reg(GMO_ex.loc[:'2011'],'GMWAX','SPY')
# From imception to 2021

Unnamed: 0,Regression Parameters
Alpha,-0.0058
SPY_beta,0.5396
$R^{2}$,0.5071


In [13]:
reg(GMO_ex.loc['2012':],'GMWAX','SPY')
# From 2012 to recent

Unnamed: 0,Regression Parameters
Alpha,-0.0345
SPY_beta,0.5622
$R^{2}$,0.7645


In [14]:
reg(GMO_ex,'GMWAX','SPY')
# Total

Unnamed: 0,Regression Parameters
Alpha,-0.017
SPY_beta,0.5456
$R^{2}$,0.5777


## b) Is GMWAX a low-beta strategy? Has that changed since the case?
We can see that GMWAX has overall market beta larger than 0.5, for me this is not a very high nor very low beta, but comparing with others it can be called a low-beta strategy. 
GMWAX's market beta has mained consistency since the case.
## c) Does GMWAX provide alpha? Has that changed across the subsamples?
No since all subsample alphas are negative.

# Part 3 Forecast Regressions
## 1. Do the lagged regression, report R-squared.
### a) X as a single regressor, the dividend-price ratio.

In [19]:
signals_copy = signals.shift()
signals_copy['SPY'] = GMO['SPY']
signals_copy

Unnamed: 0_level_0,DP,EP,US10Y,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993-02-28,,,,0.0107
1993-03-31,2.82,4.44,6.03,0.0224
1993-04-30,2.77,4.41,6.03,-0.0256
1993-05-31,2.82,4.44,6.05,0.0270
1993-06-30,2.81,4.38,6.16,0.0037
...,...,...,...,...
2022-06-30,1.57,4.79,2.85,-0.0825
2022-07-31,1.64,5.23,2.98,0.0921
2022-08-31,1.65,4.91,2.67,-0.0408
2022-09-30,1.56,5.00,3.15,-0.0924


In [35]:
DP = reg(signals_copy, 'SPY', 'DP')
DP


Unnamed: 0,Regression Parameters
Alpha,-0.1129
DP_beta,0.0094
$R^{2}$,0.0094


### b) X as a single regressor, the earnings-price ratio.

In [30]:
EP = reg(signals_copy, 'SPY', 'EP')
EP

Unnamed: 0,Regression Parameters
Alpha,-0.0712
EP_beta,0.0032
$R^{2}$,0.0086


### c) X as three regressors, the dividend-price ratio, the earnings-price ratio, and the 10-year yield.

In [31]:
EP_DP_10Y = reg(signals_copy, 'SPY', ['DP','EP','US10Y'])
EP_DP_10Y

Unnamed: 0,Regression Parameters
Alpha,-0.1792
DP_beta,0.008
EP_beta,0.0027
US10Y_beta,-0.001
$R^{2}$,0.0163


## 2. For each of the three regressions, utilize the resulting forecast in a trading strategy


In [40]:
w_DP = 100 * (DP.loc['Alpha'][0]/12 + DP.loc['DP_beta'][0] * signals_copy['DP'])

r_DP = (w_DP * signals_copy['SPY']).dropna()

w_EP = 100 * (EP.loc['Alpha'][0]/12 + EP.loc['EP_beta'][0] * signals_copy['EP'])

r_EP = (w_EP * signals_copy['SPY']).dropna()

w_3fac = 100 * (EP_DP_10Y.loc['Alpha'][0]/12 + EP_DP_10Y.loc['EP_beta'][0] * signals_copy['EP']\
                                             + EP_DP_10Y.loc['DP_beta'][0] * signals_copy['DP']\
                                             + EP_DP_10Y.loc['US10Y_beta'][0] * signals_copy['US10Y'])

r_3fac = (w_3fac * signals_copy['SPY']).dropna()

In [69]:
# estimate each strategy's performace
def summary_stats_bm(series, bm, series_total,annual_fac=12):
    ss_df = pd.DataFrame(data = None, index = ['Summary Stats'])
    ss_df['Mean'] = series.mean() * annual_fac
    ss_df['Vol'] = series.std() * np.sqrt(annual_fac)
    ss_df['Sharpe (Mean/Vol)'] = ss_df['Mean'] / ss_df['Vol']
    
    y = series
    X = sm.add_constant(bm.loc[series.index])
    reg = sm.OLS(y,X).fit()
    ss_df[r'$\alpha$'] = reg.params[0] * annual_fac
    ss_df[r'$\beta^{SPY}$'] = reg.params[1]
    
    cum_ret = (1 + series_total).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    ss_df['Max Drawdown'] = drawdown.min()
    
    ss_df['Information ratio'] = (reg.params[0] / reg.resid.std()) * np.sqrt(annual_fac)
    
    return round(ss_df, 4)

In [70]:
r_DP_total = r_DP.add(rf['US3M'])
summary_stats_bm(r_DP, signals_copy[['SPY']],r_DP_total)


Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.1095,0.149,0.7348,0.0207,0.8611,-0.6463,0.2759


In [71]:
r_EP_total = r_EP.add(rf['US3M'])
summary_stats_bm(r_EP, signals_copy[['SPY']],r_EP_total)


Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.1078,0.1286,0.8383,0.0322,0.7327,-0.3693,0.4789


In [72]:
r_3fac_total = r_3fac.add(rf['US3M'])
summary_stats_bm(r_3fac, signals_copy[['SPY']],r_3fac_total)


Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.125,0.1456,0.8588,0.0451,0.775,-0.5129,0.5118


## 3. Risk characteristics.
### (a) For both strategies, the market, and GMO, calculate the monthly VaR for pi = 0.05. Just use the quantile of the historic data for this VaR calculation.

In [73]:
# VAR
VaR = pd.DataFrame([r_DP.quantile(.05), r_EP.quantile(.05), r_3fac.quantile(.05), 
                    signals_copy['SPY'].quantile(.05), 
                    GMO['GMWAX'].quantile(.05)],
                   index = ['DP Strat','EP Strat','3-factor Strat','SPY','GMO'], 
                   columns = ['5% VaR'])

VaR

Unnamed: 0,5% VaR
DP Strat,-0.0523
EP Strat,-0.0541
3-factor Strat,-0.0642
SPY,-0.0739
GMO,-0.0473


### (b) The GMO case mentions that stocks under-performed short-term bonds from 2000-2011. Does the dynamic portfolio above under-perform the risk-free rate over this time?
From the stats listed below, all three dynamic portfolios out-performed the risk-free rate over this time.

In [74]:
summary_stats(r_DP.to_frame('DP Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
DP Strat,0.0393,0.1842,0.2135


In [75]:
summary_stats(r_EP.to_frame('EP Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
EP Strat,0.0373,0.1339,0.2784


In [77]:
summary_stats(r_3fac.to_frame('r_3_fac Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
r_3_fac Strat,0.0608,0.1574,0.3863


In [81]:
summary_stats(rf["US3M"].to_frame('RF Strat').loc['2000':'2011'])


Unnamed: 0,Mean,Vol,Sharpe
RF Strat,0.0231,0.0058,3.9866


### (c) Based on the regression estimates, in how many periods do we estimate a negative risk premium?


In [100]:
r_df = r_3fac.to_frame('3-factor Strat')
r_df['DP Strat'] = r_DP
r_df['EP Strat'] = r_EP
r_df['rf'] = rf['US3M']

df_riskprem = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df.columns[:3]:
    df_riskprem[col] = len(r_df[r_df[col] < r_df['rf']])/len(r_df) * 100
    
df_riskprem



Unnamed: 0,3-factor Strat,DP Strat,EP Strat
% of periods underperforming $r^{f}$,37.0787,37.3596,37.3596


### (d) Do you believe the dynamic strategy takes on extra risk?
Not really, all these statistics shows that overall dynamic strategies does not take extra risks comparing with SPY.

# Part 4 Out-of-Sample Forecasting
## 1. Report the out-of-sample R2


In [89]:
def OOS_r2(df, factors, start):
    y = df['SPY']
    X = sm.add_constant(df[factors])

    forecast_err, null_err = [], []

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            null_forecast = currY.mean()
            reg_predict = reg.predict(X.iloc[[i]])
            actual = y.iloc[[i]]
            forecast_err.append(reg_predict - actual)
            null_err.append(null_forecast - actual)
            
    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()
    
    return 1 - RSS/TSS

In [94]:
EP_OOS_r2 = OOS_r2(signals_copy, ['EP'], 60)

print('EP OOS R-squared: ' + str(round(EP_OOS_r2, 4)))


EP OOS R-squared: -0.007


In [95]:
DP_OOS_r2 = OOS_r2(signals_copy, ['DP'], 60)

print('DP OOS R-squared: ' + str(round(DP_OOS_r2, 4)))

DP OOS R-squared: -0.0024


### Did this forecasting strategy produce a positive OOS r-squared?
No, both strategies produced negative OOS r-squared.

## 2. Re-do problem 3.2 using this OOS forecast. 

In [97]:
def OOS_strat(df, factors, start, weight):
    returns = []
    y = df['SPY']
    X = sm.add_constant(df[factors])

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            pred = reg.predict(X.iloc[[i]])
            w = pred * weight
            returns.append((df.iloc[i]['SPY'] * w)[0])

    df_strat = pd.DataFrame(data = returns, index = df.iloc[-(len(returns)):].index, columns = ['Strat Returns'])
    return df_strat

In [110]:
OOS_EP = OOS_strat(signals_copy, ['EP'], 60, 100)

OOS_EP_total = OOS_EP.add(rf['US3M'])
summary_stats_bm(OOS_EP['Strat Returns'], signals_copy['SPY'],OOS_EP_total)



Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.0819,0.1654,0.4953,0.0353,0.5435,,0.249


In [142]:
OOS_DP = OOS_strat(signals_copy, ['DP'], 60, 100)

OOS_DP_total = OOS_DP.add(rf['US3M'], axis=0).dropna()
summary_stats_bm(OOS_DP['Strat Returns'], signals_copy['SPY'],OOS_DP_total)


Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.0798,0.1763,0.4525,-0.0069,1.0109,,-0.088


### How much better/worse is the OOS strategy compared to the in-sample version of 3.2?


## 3. Re-do problem 3.3 using this OOS forecast. Is the point-in-time version of the strategy riskier?

In [153]:
#a) 
VaR_OOS = pd.DataFrame([OOS_EP['Strat Returns'].quantile(.05),OOS_DP['Strat Returns'].quantile(.05),
                    signals_copy['SPY'].quantile(.05), 
                    GMO['GMWAX'].quantile(.05)],
                   index = ['EP Strat','DP Strat','SPY','GMO'], 
                   columns = ['5% VaR'])

VaR_OOS

Unnamed: 0,5% VaR
EP Strat,-0.071
DP Strat,-0.0726
SPY,-0.0739
GMO,-0.0473


In [154]:
#b)
summary_stats_bm(OOS_EP.loc['2000':'2011']['Strat Returns'], GMO[['SPY']],OOS_EP_total)

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,0.0388,0.1959,0.1979,0.0333,0.2994,,0.1757


In [155]:
summary_stats_bm(OOS_DP.loc['2000':'2011']['Strat Returns'], GMO[['SPY']],OOS_DP_total)

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown,Information ratio
Summary Stats,-0.0109,0.1632,-0.0667,-0.0282,0.9551,,-0.5709


In [156]:
summary_stats(rf.loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
US3M,0.0231,0.0058,3.9866


We can observe that these two dynamic strategies hardly outperformed the risk-free rate in terms of premium.

In [162]:
#c)
r_df_OOS_EP = OOS_EP.rename(columns={"Strat Returns": "EP Strat"})
r_df_OOS_EP['rf'] = rf['US3M']

riskprem2_EP = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df_OOS_EP.columns[:3]:
    riskprem2_EP[col] = len(r_df_OOS_EP[r_df_OOS[col] < r_df_OOS_EP['rf']])/len(r_df_OOS_EP) * 100
    
riskprem2_EP

Unnamed: 0,EP Strat,rf
% of periods underperforming $r^{f}$,38.3838,0.0


In [163]:
r_df_OOS_DP = OOS_DP.rename(columns={"Strat Returns": "DP Strat"})
r_df_OOS_DP['rf'] = rf['US3M']

riskprem2_DP = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df_OOS_DP.columns[:3]:
    riskprem2_DP[col] = len(r_df_OOS_DP[r_df_OOS_DP[col] < r_df_OOS_DP['rf']])/len(r_df_OOS_DP) * 100
    
riskprem2_DP

Unnamed: 0,DP Strat,rf
% of periods underperforming $r^{f}$,39.0572,0.0


#d)
Yes, it seems riker than SPY. 