# GMO Forecasting

## Group C : 18

Members: Aryaa Gunavante, Catherine Chen, Hsiao-Chi Chen, Shreya Enaganti.

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

## 2 Analyzing GMO
This section utilizes data in the file gmo_data.xlsx. Convert total returns to excess returns using the risk‑free rate.

In [2]:
# 2

file_path = "gmo_analysis_data.xlsx"
tr = pd.read_excel(file_path, sheet_name="total returns")
rf = pd.read_excel(file_path, sheet_name="risk-free rate")
signal = pd.read_excel(file_path, sheet_name="signals")
tr["date"] = pd.to_datetime(tr["date"])
rf["date"] = pd.to_datetime(rf["date"])
signal["date"] = pd.to_datetime(signal["date"])
tr.set_index("date", inplace=True)
rf.set_index("date", inplace=True)
signal.set_index("date", inplace=True) 

er = tr - (rf.values / 12)
er.head()

Unnamed: 0_level_0,SPY,GMWAX,GMGEX
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1996-12-31,-0.027601,-0.026403,-0.017309
1997-01-31,0.057497,0.010446,0.030159
1997-02-28,0.005215,0.017915,0.008383
1997-03-31,-0.050156,-0.019587,-0.020876
1997-04-30,0.060008,-0.011092,-0.004361


In [3]:
FREQ = 12

def performance_stats(ret):
    """Compute mean, vol, Sharpe (annualized)."""
    mean_ann = ret.mean() * FREQ
    vol_ann = ret.std() * np.sqrt(FREQ)
    sharpe = mean_ann / vol_ann if vol_ann != 0 else np.nan
    return mean_ann, vol_ann, sharpe

def max_drawdown(total_ret):
    wealth = (1 + total_ret).cumprod()
    peak = wealth.cummax()
    dd = (wealth - peak) / peak
    return dd.min()

def tail_risk(ret, total_ret):
    min_ret = ret.min()
    var_5 = ret.quantile(0.05)
    mdd = max_drawdown(total_ret)
    return min_ret, var_5, mdd

def regression_stats(y, x):
    X = sm.add_constant(x)
    model = sm.OLS(y, X).fit()
    alpha_daily = model.params["const"]
    beta = model.params[x.name]
    r2 = model.rsquared
    alpha_ann = alpha_daily * FREQ
    return alpha_ann, beta, r2

In [4]:
# 2.1

samples = {
    "inception_to_2011": er.loc[: "2011-12-31"],
    "2012_to_present":   er.loc["2012-01-01": ],
    "inception_to_present": er
}

print("===== 2.1 PERFORMANCE (GMWAX) =====")
for name, sub in samples.items():
    m, v, s = performance_stats(sub["GMWAX"])
    print(f"\n--- {name} ---")
    print(f"Mean (ann):   {m:.6f}")
    print(f"Vol (ann):    {v:.6f}")
    print(f"Sharpe:       {s:.6f}")

===== 2.1 PERFORMANCE (GMWAX) =====

--- inception_to_2011 ---
Mean (ann):   0.046422
Vol (ann):    0.110499
Sharpe:       0.420110

--- 2012_to_present ---
Mean (ann):   0.049157
Vol (ann):    0.092661
Sharpe:       0.530503

--- inception_to_present ---
Mean (ann):   0.047730
Vol (ann):    0.102209
Sharpe:       0.466986


<span style="color: blue;">

ANS:

Both the mean and the volatility improve in the second subsample, lead to a even higher Sharpe Ratio. The result is quite robust.

In [5]:
#2.2

print("\n===== 2.2 TAIL RISK (GMWAX) =====")
for name, sub in samples.items():
    min_ret, var5, mdd = tail_risk(
        sub["GMWAX"],            # excess return
        tr.loc[sub.index]["GMWAX"]    # total return
    )
    print(f"\n--- {name} ---")
    print(f"Min return:   {min_ret:.6f}")
    print(f"VaR-5%:       {var5:.6f}")
    print(f"Max drawdown: {mdd:.6f}")


===== 2.2 TAIL RISK (GMWAX) =====

--- inception_to_2011 ---
Min return:   -0.149150
VaR-5%:       -0.044003
Max drawdown: -0.293614

--- 2012_to_present ---
Min return:   -0.115018
VaR-5%:       -0.039814
Max drawdown: -0.216795

--- inception_to_present ---
Min return:   -0.149150
VaR-5%:       -0.041147
Max drawdown: -0.293614


<span style="color: blue;">

ANS:

(a) The tail-risk seems fine, as Var-5% of monthly data is around 4% through the sample, and the minimum return is 15% in all the sample, and maximum drawdown is 29.4% in all the sample.

(b) No, the second subsample seems quite similar to the first subsample in term of tail risk. It was actually improving.

In [6]:
# 2.3

print("\n===== 2.3 MARKET EXPOSURE (GMWAX) =====")
for name, sub in samples.items():
    alpha, beta, r2 = regression_stats(
        sub["GMWAX"],     # y
        sub["SPY"]  # x = SPY excess
    )
    print(f"\n--- {name} ---")
    print(f"Alpha (ann):  {alpha:.6f}")
    print(f"Beta:         {beta:.6f}")
    print(f"R²:           {r2:.6f}")



===== 2.3 MARKET EXPOSURE (GMWAX) =====

--- inception_to_2011 ---
Alpha (ann):  0.027000
Beta:         0.542128
R²:           0.648686

--- 2012_to_present ---
Alpha (ann):  -0.027362
Beta:         0.566914
R²:           0.730904

--- inception_to_present ---
Alpha (ann):  0.002152
Beta:         0.547452
R²:           0.675236


<span style="color: blue;">

ANS:

GMWAX has around 0.5 Beta, I would not consider it low-beta strategy, and this didn't change across subsamples.

GMWAX had a provide alpha in the first subsample, but not in the second subsample.

In [7]:
# 2.4

def analyze_one_fund(fund_name):
    print(f"\n========== RESULTS FOR {fund_name} ==========")

    # 2.1 Performance
    print("\n--- 2.1 PERFORMANCE ---")
    for name, sub in samples.items():
        m, v, s = performance_stats(sub[fund_name])
        print(f"\n{name}")
        print(f"Mean (ann):   {m:.6f}")
        print(f"Vol (ann):    {v:.6f}")
        print(f"Sharpe:       {s:.6f}")

    # 2.2 Tail Risk
    print("\n--- 2.2 TAIL RISK ---")
    for name, sub in samples.items():
        min_ret, var5, mdd = tail_risk(
            sub[fund_name],
            tr.loc[sub.index][fund_name]
        )
        print(f"\n{name}")
        print(f"Min return:   {min_ret:.6f}")
        print(f"VaR-5%:       {var5:.6f}")
        print(f"Max drawdown: {mdd:.6f}")

    # 2.3 Market Exposure Regression
    print("\n--- 2.3 REGRESSION ---")
    for name, sub in samples.items():
        alpha, beta, r2 = regression_stats(
            sub[fund_name],
            er.loc[sub.index]["SPY"]
        )
        print(f"\n{name}")
        print(f"Alpha (ann):  {alpha:.6f}")
        print(f"Beta:         {beta:.6f}")
        print(f"R²:           {r2:.6f}")

# Run comparison
analyze_one_fund("GMWAX")
analyze_one_fund("GMGEX")



--- 2.1 PERFORMANCE ---

inception_to_2011
Mean (ann):   0.046422
Vol (ann):    0.110499
Sharpe:       0.420110

2012_to_present
Mean (ann):   0.049157
Vol (ann):    0.092661
Sharpe:       0.530503

inception_to_present
Mean (ann):   0.047730
Vol (ann):    0.102209
Sharpe:       0.466986

--- 2.2 TAIL RISK ---

inception_to_2011
Min return:   -0.149150
VaR-5%:       -0.044003
Max drawdown: -0.293614

2012_to_present
Min return:   -0.115018
VaR-5%:       -0.039814
Max drawdown: -0.216795

inception_to_present
Min return:   -0.149150
VaR-5%:       -0.041147
Max drawdown: -0.293614

--- 2.3 REGRESSION ---

inception_to_2011
Alpha (ann):  0.027000
Beta:         0.542128
R²:           0.648686

2012_to_present
Alpha (ann):  -0.027362
Beta:         0.566914
R²:           0.730904

inception_to_present
Alpha (ann):  0.002152
Beta:         0.547452
R²:           0.675236


--- 2.1 PERFORMANCE ---

inception_to_2011
Mean (ann):   -0.003823
Vol (ann):    0.147253
Sharpe:       -0.025963

2012_

<span style="color: blue;">

ANS:

Overall, GMWAX demonstrates superior performance, lower volatility, and more stable market behavior across all periods. In contrast, GMGEX delivers significantly higher risk, deeper drawdowns, and weaker or negative alpha, especially after 2012. From a risk-adjusted perspective, GMWAX is clearly the stronger and more reliable fund.



In [8]:
# 3

import pandas as pd
import numpy as np

# Read the Excel file
total_returns = pd.read_excel('gmo_analysis_data.xlsx', sheet_name="total returns", index_col=0)
risk_free_rate = pd.read_excel('gmo_analysis_data.xlsx', sheet_name="risk-free rate", index_col=0) / 12

# Date to datetime
total_returns.index = pd.to_datetime(total_returns.index)
risk_free_rate.index = pd.to_datetime(risk_free_rate.index)


# Calculate excess returns by subtracting risk-free rate from each column
excess_returns = total_returns.sub(risk_free_rate['TBill 3M'], axis=0)

print(total_returns.head())
print(risk_free_rate.head())
print(excess_returns.head())

# Extract GMWAX excess returns
gmwax_excess = excess_returns['GMWAX'].dropna()

# Define time periods
inception_date = gmwax_excess.index.min()
end_2011 = pd.Timestamp('2011-12-31')
start_2012 = pd.Timestamp('2012-01-01')
present_date = gmwax_excess.index.max()

signals = pd.read_excel('gmo_analysis_data.xlsx', sheet_name="signals", index_col=0)
signals.head()

                 SPY     GMWAX     GMGEX
date                                    
1996-12-31 -0.023292 -0.022094 -0.013000
1997-01-31  0.061786  0.014735  0.034448
1997-02-28  0.009565  0.022265  0.012733
1997-03-31 -0.045721 -0.015152 -0.016441
1997-04-30  0.064368 -0.006731  0.000000
            TBill 3M
date                
1996-12-31  0.004309
1997-01-31  0.004289
1997-02-28  0.004350
1997-03-31  0.004435
1997-04-30  0.004361
                 SPY     GMWAX     GMGEX
date                                    
1996-12-31 -0.027601 -0.026403 -0.017309
1997-01-31  0.057497  0.010446  0.030159
1997-02-28  0.005215  0.017915  0.008383
1997-03-31 -0.050156 -0.019587 -0.020876
1997-04-30  0.060008 -0.011092 -0.004361


Unnamed: 0_level_0,SPX D/P,SPX E/P,T-Note 10YR
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1996-12-31,0.019651,0.051592,0.06418
1997-01-31,0.018455,0.048704,0.06494
1997-02-28,0.018502,0.048434,0.06552
1997-03-31,0.019427,0.055559,0.06903
1997-04-30,0.01843,0.052318,0.06718


In [9]:
# ============================================================================
# 3. Forecast Regressions - Lagged Regression
# r^SPY_t = α + β'X_{t-1} + ε_t
# ============================================================================

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Get SPY excess returns (dependent variable)
spy_excess = excess_returns['SPY'].dropna()

# Ensure signals index is datetime
signals.index = pd.to_datetime(signals.index)

# Define predictor columns
dp_col = 'SPX D/P'  # Dividend-price ratio
ep_col = 'SPX E/P'  # Earnings-price ratio
yield_col = 'T-Note 10YR'  # 10-year yield

print("="*80)
print("LAGGED REGRESSION: r^SPY_t = α + β'X_{t-1} + ε_t")
print("="*80)
print(f"\nDependent variable: SPY excess returns")
print(f"Predictors: {dp_col}, {ep_col}, {yield_col}")
print(f"\nData ranges:")
print(f"  SPY excess returns: {spy_excess.index.min().date()} to {spy_excess.index.max().date()}")
print(f"  Signals: {signals.index.min().date()} to {signals.index.max().date()}")

# Function to perform lagged regression
def lagged_regression(y, X, X_names):
    """
    Perform lagged regression: y_t = α + β'X_{t-1} + ε_t
    
    Parameters:
    y: Series of dependent variable (SPY returns at time t)
    X: DataFrame of predictors (at time t-1)
    X_names: List of predictor names
    
    Returns:
    Dictionary with alpha, beta, R², and other statistics
    """
    # Align indices - y at time t, X at time t-1
    # We need to shift X forward by 1 period so X[t-1] aligns with y[t]
    X_lagged = X[X_names].shift(1)  # Shift forward so X[t-1] aligns with y[t]
    
    # Align indices
    common_idx = y.index.intersection(X_lagged.index)
    y_aligned = y.loc[common_idx]
    X_aligned = X_lagged.loc[common_idx]
    
    # Drop rows with NaN (from the lag)
    valid_mask = ~(X_aligned.isna().any(axis=1) | y_aligned.isna())
    y_clean = y_aligned[valid_mask]
    X_clean = X_aligned[valid_mask]
    
    if len(y_clean) == 0:
        return None
    
    # Prepare data for regression
    X_values = X_clean.values
    y_values = y_clean.values
    
    # Perform OLS regression
    reg = LinearRegression()
    reg.fit(X_values, y_values)
    
    # Get predictions
    y_pred = reg.predict(X_values)
    
    # Calculate R-squared
    r_squared = r2_score(y_values, y_pred)
    
    # Calculate standard errors and t-statistics
    n = len(y_values)
    k = X_values.shape[1]  # number of predictors
    residuals = y_values - y_pred
    mse = np.sum(residuals ** 2) / (n - k - 1)  # Mean squared error
    
    # Standard errors
    X_with_const = np.column_stack([np.ones(n), X_values])
    try:
        cov_matrix = mse * np.linalg.inv(X_with_const.T @ X_with_const)
        se = np.sqrt(np.diag(cov_matrix))
        se_alpha = se[0]
        se_beta = se[1:]
    except:
        se_alpha = np.nan
        se_beta = np.full(k, np.nan)
    
    # t-statistics
    t_alpha = reg.intercept_ / se_alpha if se_alpha != 0 and not np.isnan(se_alpha) else np.nan
    t_beta = reg.coef_ / se_beta if not np.isnan(se_beta).any() else np.full(k, np.nan)
    
    # Create beta dictionary
    beta_dict = {name: coef for name, coef in zip(X_names, reg.coef_)}
    t_beta_dict = {name: t for name, t in zip(X_names, t_beta)}
    
    return {
        'Alpha': reg.intercept_,
        'Beta': beta_dict,
        'R²': r_squared,
        'Alpha (t-stat)': t_alpha,
        'Beta (t-stat)': t_beta_dict,
        'N observations': n
    }

# ============================================================================
# Regression 1: DP only
# ============================================================================
print("\n" + "="*80)
print("1. Regression with DP (dividend-price ratio) only")
print("="*80)
reg1 = lagged_regression(spy_excess, signals, [dp_col])
if reg1:
    print(f"Alpha: {reg1['Alpha']:.6f} (t-stat: {reg1['Alpha (t-stat)']:.4f})")
    print(f"Beta ({dp_col}): {reg1['Beta'][dp_col]:.6f} (t-stat: {reg1['Beta (t-stat)'][dp_col]:.4f})")
    print(f"R²: {reg1['R²']:.6f}")
    print(f"N observations: {reg1['N observations']}")
else:
    print("Regression failed - check data alignment")

# ============================================================================
# Regression 2: EP only
# ============================================================================
print("\n" + "="*80)
print("2. Regression with EP (earnings-price ratio) only")
print("="*80)
reg2 = lagged_regression(spy_excess, signals, [ep_col])
if reg2:
    print(f"Alpha: {reg2['Alpha']:.6f} (t-stat: {reg2['Alpha (t-stat)']:.4f})")
    print(f"Beta ({ep_col}): {reg2['Beta'][ep_col]:.6f} (t-stat: {reg2['Beta (t-stat)'][ep_col]:.4f})")
    print(f"R²: {reg2['R²']:.6f}")
    print(f"N observations: {reg2['N observations']}")
else:
    print("Regression failed - check data alignment")

# ============================================================================
# Regression 3: DP, EP, and 10-year yield
# ============================================================================
print("\n" + "="*80)
print("3. Regression with DP, EP, and 10-year yield")
print("="*80)
reg3 = lagged_regression(spy_excess, signals, [dp_col, ep_col, yield_col])
if reg3:
    print(f"Alpha: {reg3['Alpha']:.6f} (t-stat: {reg3['Alpha (t-stat)']:.4f})")
    print(f"Beta ({dp_col}): {reg3['Beta'][dp_col]:.6f} (t-stat: {reg3['Beta (t-stat)'][dp_col]:.4f})")
    print(f"Beta ({ep_col}): {reg3['Beta'][ep_col]:.6f} (t-stat: {reg3['Beta (t-stat)'][ep_col]:.4f})")
    print(f"Beta ({yield_col}): {reg3['Beta'][yield_col]:.6f} (t-stat: {reg3['Beta (t-stat)'][yield_col]:.4f})")
    print(f"R²: {reg3['R²']:.6f}")
    print(f"N observations: {reg3['N observations']}")
else:
    print("Regression failed - check data alignment")

# ============================================================================
# Summary Table
# ============================================================================
print("\n" + "="*80)
print("SUMMARY TABLE")
print("="*80)
summary_data = []
if reg1:
    summary_data.append({
        'Model': 'DP only',
        'Alpha': reg1['Alpha'],
        'Beta_DP': reg1['Beta'][dp_col],
        'R²': reg1['R²']
    })
if reg2:
    summary_data.append({
        'Model': 'EP only',
        'Alpha': reg2['Alpha'],
        'Beta_EP': reg2['Beta'][ep_col],
        'R²': reg2['R²']
    })
if reg3:
    summary_data.append({
        'Model': 'DP, EP, 10Y Yield',
        'Alpha': reg3['Alpha'],
        'Beta_DP': reg3['Beta'][dp_col],
        'Beta_EP': reg3['Beta'][ep_col],
        'Beta_10Y': reg3['Beta'][yield_col],
        'R²': reg3['R²']
    })

if summary_data:
    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))


LAGGED REGRESSION: r^SPY_t = α + β'X_{t-1} + ε_t

Dependent variable: SPY excess returns
Predictors: SPX D/P, SPX E/P, T-Note 10YR

Data ranges:
  SPY excess returns: 1996-12-31 to 2025-10-31
  Signals: 1996-12-31 to 2025-10-31

1. Regression with DP (dividend-price ratio) only
Alpha: -0.014011 (t-stat: -1.3023)
Beta (SPX D/P): 1.171755 (t-stat: 2.0059)
R²: 0.011561
N observations: 346

2. Regression with EP (earnings-price ratio) only
Alpha: -0.007194 (t-stat: -0.6979)
Beta (SPX E/P): 0.264020 (t-stat: 1.4189)
R²: 0.005819
N observations: 346

3. Regression with DP, EP, and 10-year yield
Alpha: -0.003545 (t-stat: -0.2210)
Beta (SPX D/P): 0.568923 (t-stat: 0.6141)
Beta (SPX E/P): 0.136755 (t-stat: 0.5219)
Beta (T-Note 10YR): -0.197803 (t-stat: -1.0037)
R²: 0.014541
N observations: 346

SUMMARY TABLE
            Model     Alpha  Beta_DP       R²  Beta_EP  Beta_10Y
          DP only -0.014011 1.171755 0.011561      NaN       NaN
          EP only -0.007194      NaN 0.005819 0.264020     

In [10]:
# ============================================================================
# 2. Trading Strategy from Forecasts
# ============================================================================

# We need to rebuild the regression models to get the coefficients for forecasting
# Then use X_t to forecast r^{SPY}_{t+1}

print("="*80)
print("TRADING STRATEGY FROM FORECASTS")
print("="*80)

# Function to build forecasts and trading strategy
def build_trading_strategy(reg_result, signals_df, spy_returns, X_names, strategy_name):
    """
    Build trading strategy from regression forecasts.
    
    Steps:
    1. Build forecasted SPY return: r̂^{SPY}_{t+1} = α + β'X_t
    2. Set portfolio weight: w_t = 100 * r̂^{SPY}_{t+1}
    3. Strategy return: r^x_{t+1} = w_t * r^{SPY}_{t+1}
    """
    if reg_result is None:
        return None
    
    # Get regression coefficients
    alpha = reg_result['Alpha']
    beta_dict = reg_result['Beta']
    
    # Align data - we need X_t to forecast r^{SPY}_{t+1}
    # X_t is at time t, and we forecast r^{SPY}_{t+1} (next period return)
    common_idx = signals_df.index.intersection(spy_returns.index)
    signals_aligned = signals_df.loc[common_idx, X_names]
    spy_aligned = spy_returns.loc[common_idx]
    
    # Shift spy_returns forward by 1 period so r^{SPY}_{t+1} aligns with X_t
    spy_next = spy_aligned.shift(-1)  # r^{SPY}_{t+1}
    
    # Align again after shift
    common_idx_final = signals_aligned.index.intersection(spy_next.index)
    signals_final = signals_aligned.loc[common_idx_final]
    spy_next_final = spy_next.loc[common_idx_final]
    
    # Drop NaN
    valid_mask = ~(signals_final.isna().any(axis=1) | spy_next_final.isna())
    signals_clean = signals_final[valid_mask]
    spy_next_clean = spy_next_final[valid_mask]
    
    if len(signals_clean) == 0:
        return None
    
    # Build forecasts: r̂^{SPY}_{t+1} = α + β'X_t
    X_values = signals_clean.values
    forecasts = alpha + np.sum([beta_dict[name] * signals_clean[name].values 
                                for name in X_names], axis=0)
    
    # Portfolio weights: w_t = 100 * r̂^{SPY}_{t+1}
    weights = 100 * forecasts
    
    # Strategy returns: r^x_{t+1} = w_t * r^{SPY}_{t+1}
    strategy_returns = weights * spy_next_clean.values
    
    # Create Series with proper index
    strategy_returns_series = pd.Series(strategy_returns, index=signals_clean.index)
    
    return {
        'strategy_name': strategy_name,
        'forecasts': pd.Series(forecasts, index=signals_clean.index),
        'weights': pd.Series(weights, index=signals_clean.index),
        'strategy_returns': strategy_returns_series,
        'spy_returns': spy_next_clean
    }

# Function to calculate strategy statistics
def calculate_strategy_stats(strategy_dict, spy_market):
    """Calculate statistics for trading strategy."""
    if strategy_dict is None:
        return None
    
    strategy_ret = strategy_dict['strategy_returns']
    spy_ret = strategy_dict['spy_returns']
    
    # Mean, volatility, Sharpe (annualized)
    mean_monthly = strategy_ret.mean()
    vol_monthly = strategy_ret.std()
    sharpe_monthly = mean_monthly / vol_monthly if vol_monthly > 0 else np.nan
    
    mean_annual = mean_monthly * 12
    vol_annual = vol_monthly * np.sqrt(12)
    sharpe_annual = mean_annual / vol_annual if vol_annual > 0 else np.nan
    
    # Maximum drawdown (on total returns, need to convert to cumulative)
    cumulative = (1 + strategy_ret).cumprod()
    running_max = cumulative.expanding().max()
    drawdown = (cumulative - running_max) / running_max
    max_drawdown = drawdown.min()
    
    # Market alpha and beta (regress strategy returns on market returns)
    # Align indices
    common_idx = strategy_ret.index.intersection(spy_market.index)
    strategy_aligned = strategy_ret.loc[common_idx]
    market_aligned = spy_market.loc[common_idx]
    
    if len(strategy_aligned) > 0:
        X = market_aligned.values.reshape(-1, 1)
        y = strategy_aligned.values
        
        reg = LinearRegression()
        reg.fit(X, y)
        y_pred = reg.predict(X)
        
        # Alpha and beta
        alpha = reg.intercept_
        beta = reg.coef_[0]
        
        # Information ratio = alpha / tracking error
        residuals = y - y_pred
        tracking_error = np.std(residuals)
        information_ratio = alpha / tracking_error if tracking_error > 0 else np.nan
        
        # Annualize information ratio
        info_ratio_annual = information_ratio * np.sqrt(12) if not np.isnan(information_ratio) else np.nan
    else:
        alpha = np.nan
        beta = np.nan
        information_ratio = np.nan
        info_ratio_annual = np.nan
    
    return {
        'Mean (annualized)': mean_annual,
        'Volatility (annualized)': vol_annual,
        'Sharpe Ratio (annualized)': sharpe_annual,
        'Max Drawdown': max_drawdown,
        'Market Alpha': alpha,
        'Market Beta': beta,
        'Information Ratio (annualized)': info_ratio_annual,
        'N observations': len(strategy_ret)
    }

# Build strategies for all three regressions
print("\nBuilding trading strategies...")

# Strategy 1: DP only
strategy1 = build_trading_strategy(reg1, signals, spy_excess, [dp_col], 'DP only')
stats1 = calculate_strategy_stats(strategy1, spy_excess) if strategy1 else None

# Strategy 2: EP only
strategy2 = build_trading_strategy(reg2, signals, spy_excess, [ep_col], 'EP only')
stats2 = calculate_strategy_stats(strategy2, spy_excess) if strategy2 else None

# Strategy 3: DP, EP, 10Y Yield
strategy3 = build_trading_strategy(reg3, signals, spy_excess, [dp_col, ep_col, yield_col], 'DP, EP, 10Y Yield')
stats3 = calculate_strategy_stats(strategy3, spy_excess) if strategy3 else None

# Display results
print("\n" + "="*80)
print("STRATEGY 1: DP only")
print("="*80)
if stats1:
    for key, value in stats1.items():
        if isinstance(value, float):
            print(f"{key}: {value:.6f}")
        else:
            print(f"{key}: {value}")
else:
    print("Strategy calculation failed")

print("\n" + "="*80)
print("STRATEGY 2: EP only")
print("="*80)
if stats2:
    for key, value in stats2.items():
        if isinstance(value, float):
            print(f"{key}: {value:.6f}")
        else:
            print(f"{key}: {value}")
else:
    print("Strategy calculation failed")

print("\n" + "="*80)
print("STRATEGY 3: DP, EP, 10Y Yield")
print("="*80)
if stats3:
    for key, value in stats3.items():
        if isinstance(value, float):
            print(f"{key}: {value:.6f}")
        else:
            print(f"{key}: {value}")
else:
    print("Strategy calculation failed")

# Summary table
print("\n" + "="*80)
print("SUMMARY TABLE: Trading Strategy Statistics")
print("="*80)
summary_data = []
if stats1:
    summary_data.append({
        'Strategy': 'DP only',
        'Mean (annualized)': stats1['Mean (annualized)'],
        'Volatility (annualized)': stats1['Volatility (annualized)'],
        'Sharpe Ratio': stats1['Sharpe Ratio (annualized)'],
        'Max Drawdown': stats1['Max Drawdown'],
        'Market Alpha': stats1['Market Alpha'],
        'Market Beta': stats1['Market Beta'],
        'Info Ratio (annualized)': stats1['Information Ratio (annualized)']
    })
if stats2:
    summary_data.append({
        'Strategy': 'EP only',
        'Mean (annualized)': stats2['Mean (annualized)'],
        'Volatility (annualized)': stats2['Volatility (annualized)'],
        'Sharpe Ratio': stats2['Sharpe Ratio (annualized)'],
        'Max Drawdown': stats2['Max Drawdown'],
        'Market Alpha': stats2['Market Alpha'],
        'Market Beta': stats2['Market Beta'],
        'Info Ratio (annualized)': stats2['Information Ratio (annualized)']
    })
if stats3:
    summary_data.append({
        'Strategy': 'DP, EP, 10Y Yield',
        'Mean (annualized)': stats3['Mean (annualized)'],
        'Volatility (annualized)': stats3['Volatility (annualized)'],
        'Sharpe Ratio': stats3['Sharpe Ratio (annualized)'],
        'Max Drawdown': stats3['Max Drawdown'],
        'Market Alpha': stats3['Market Alpha'],
        'Market Beta': stats3['Market Beta'],
        'Info Ratio (annualized)': stats3['Information Ratio (annualized)']
    })

if summary_data:
    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))


TRADING STRATEGY FROM FORECASTS

Building trading strategies...

STRATEGY 1: DP only
Mean (annualized): 0.086599
Volatility (annualized): 0.158435
Sharpe Ratio (annualized): 0.546594
Max Drawdown: -0.707442
Market Alpha: 0.006809
Market Beta: 0.059071
Information Ratio (annualized): 0.517326
N observations: 346

STRATEGY 2: EP only
Mean (annualized): 0.073108
Volatility (annualized): 0.131976
Sharpe Ratio (annualized): 0.553950
Max Drawdown: -0.579859
Market Alpha: 0.005939
Market Beta: 0.022191
Information Ratio (annualized): 0.540994
N observations: 346

STRATEGY 3: DP, EP, 10Y Yield
Mean (annualized): 0.093602
Volatility (annualized): 0.156400
Sharpe Ratio (annualized): 0.598479
Max Drawdown: -0.645193
Market Alpha: 0.007552
Market Beta: 0.035999
Information Ratio (annualized): 0.580627
N observations: 346

SUMMARY TABLE: Trading Strategy Statistics
         Strategy  Mean (annualized)  Volatility (annualized)  Sharpe Ratio  Max Drawdown  Market Alpha  Market Beta  Info Ratio (annua

In [11]:
# ============================================================================
# 3. Risk Characteristics
# ============================================================================

print("="*80)
print("RISK CHARACTERISTICS ANALYSIS")
print("="*80)

# ============================================================================
# 1. Compute monthly VaR at π = 0.05 (5th percentile) for strategies, market, and GMO
# ============================================================================

def calculate_var(returns, percentile=0.05):
    """Calculate Value at Risk (VaR) using historical quantile."""
    if len(returns) == 0:
        return np.nan
    return returns.quantile(percentile)

# Get market returns (SPY excess returns)
market_returns = spy_excess

# Get GMO returns (GMWAX excess returns)
gmo_returns = excess_returns['GMWAX'].dropna()

# Calculate VaR for each
print("\n" + "="*80)
print("1. Monthly VaR at π = 0.05 (5th percentile)")
print("="*80)

var_market = calculate_var(market_returns, 0.05)
var_gmo = calculate_var(gmo_returns, 0.05)

print(f"Market (SPY): {var_market:.6f} ({var_market*100:.2f}%)")
print(f"GMO (GMWAX): {var_gmo:.6f} ({var_gmo*100:.2f}%)")

# Strategy VaRs
if strategy1:
    var_strategy1 = calculate_var(strategy1['strategy_returns'], 0.05)
    print(f"Strategy 1 (DP only): {var_strategy1:.6f} ({var_strategy1*100:.2f}%)")
else:
    var_strategy1 = np.nan

if strategy2:
    var_strategy2 = calculate_var(strategy2['strategy_returns'], 0.05)
    print(f"Strategy 2 (EP only): {var_strategy2:.6f} ({var_strategy2*100:.2f}%)")
else:
    var_strategy2 = np.nan

if strategy3:
    var_strategy3 = calculate_var(strategy3['strategy_returns'], 0.05)
    print(f"Strategy 3 (DP, EP, 10Y Yield): {var_strategy3:.6f} ({var_strategy3*100:.2f}%)")
else:
    var_strategy3 = np.nan

# ============================================================================
# 2. Check if dynamic portfolio underperforms risk-free rate from 2000-2011
# ============================================================================

print("\n" + "="*80)
print("2. Dynamic Portfolio Performance vs Risk-Free Rate (2000-2011)")
print("="*80)

# Define period
start_2000 = pd.Timestamp('2000-01-01')
end_2011 = pd.Timestamp('2011-12-31')

# Get risk-free rate for this period
rf_period = risk_free_rate['TBill 3M'].loc[(risk_free_rate.index >= start_2000) & 
                                            (risk_free_rate.index <= end_2011)]

# Convert to monthly if annualized (check if values > 0.1)
if rf_period.max() > 0.1:
    rf_monthly = rf_period / 12
else:
    rf_monthly = rf_period

print(f"\nPeriod: {start_2000.date()} to {end_2011.date()}")
print(f"Risk-free rate (monthly, average): {rf_monthly.mean():.6f} ({rf_monthly.mean()*100:.2f}%)")

# Check each strategy
strategies_to_check = [
    (strategy1, 'Strategy 1 (DP only)'),
    (strategy2, 'Strategy 2 (EP only)'),
    (strategy3, 'Strategy 3 (DP, EP, 10Y Yield)')
]

for strategy, name in strategies_to_check:
    if strategy:
        strategy_ret = strategy['strategy_returns']
        strategy_period = strategy_ret.loc[(strategy_ret.index >= start_2000) & 
                                          (strategy_ret.index <= end_2011)]
        
        if len(strategy_period) > 0:
            # Align with risk-free rate
            common_idx = strategy_period.index.intersection(rf_monthly.index)
            strategy_aligned = strategy_period.loc[common_idx]
            rf_aligned = rf_monthly.loc[common_idx]
            
            # Calculate excess return over risk-free rate
            excess_over_rf = strategy_aligned - rf_aligned
            mean_excess = excess_over_rf.mean()
            total_excess = (1 + excess_over_rf).prod() - 1  # Cumulative excess return
            
            print(f"\n{name}:")
            print(f"  Mean monthly return: {strategy_aligned.mean():.6f} ({strategy_aligned.mean()*100:.2f}%)")
            print(f"  Mean excess over RF: {mean_excess:.6f} ({mean_excess*100:.2f}%)")
            print(f"  Cumulative excess return: {total_excess:.6f} ({total_excess*100:.2f}%)")
            
            if mean_excess < 0:
                print(f"  ✓ Underperforms risk-free rate (negative excess return)")
            else:
                print(f"  ✗ Outperforms risk-free rate (positive excess return)")

# ============================================================================
# 3. Count periods with negative risk premium based on regression estimates
# ============================================================================

print("\n" + "="*80)
print("3. Periods with Negative Risk Premium (based on regression forecasts)")
print("="*80)

def count_negative_premiums(reg_result, signals_df, X_names):
    """Count how many periods have negative forecasted risk premium."""
    if reg_result is None:
        return None
    
    alpha = reg_result['Alpha']
    beta_dict = reg_result['Beta']
    
    # Align data
    common_idx = signals_df.index.intersection(spy_excess.index)
    signals_aligned = signals_df.loc[common_idx, X_names]
    
    # Drop NaN
    valid_mask = ~signals_aligned.isna().any(axis=1)
    signals_clean = signals_aligned[valid_mask]
    
    if len(signals_clean) == 0:
        return None
    
    # Build forecasts: r̂^{SPY}_{t+1} = α + β'X_t
    forecasts = alpha + np.sum([beta_dict[name] * signals_clean[name].values 
                                for name in X_names], axis=0)
    
    # Count negative forecasts (negative risk premium)
    negative_count = np.sum(forecasts < 0)
    total_count = len(forecasts)
    negative_pct = (negative_count / total_count) * 100 if total_count > 0 else 0
    
    return {
        'negative_count': negative_count,
        'total_count': total_count,
        'negative_pct': negative_pct,
        'forecasts': pd.Series(forecasts, index=signals_clean.index)
    }

# Count for each regression
neg1 = count_negative_premiums(reg1, signals, [dp_col])
neg2 = count_negative_premiums(reg2, signals, [ep_col])
neg3 = count_negative_premiums(reg3, signals, [dp_col, ep_col, yield_col])

if neg1:
    print(f"\nStrategy 1 (DP only):")
    print(f"  Periods with negative risk premium: {neg1['negative_count']} out of {neg1['total_count']} ({neg1['negative_pct']:.2f}%)")

if neg2:
    print(f"\nStrategy 2 (EP only):")
    print(f"  Periods with negative risk premium: {neg2['negative_count']} out of {neg2['total_count']} ({neg2['negative_pct']:.2f}%)")

if neg3:
    print(f"\nStrategy 3 (DP, EP, 10Y Yield):")
    print(f"  Periods with negative risk premium: {neg3['negative_count']} out of {neg3['total_count']} ({neg3['negative_pct']:.2f}%)")

# ============================================================================
# 4. Assess if dynamic strategy takes on extra risk
# ============================================================================

print("\n" + "="*80)
print("4. Does the Dynamic Strategy Take on Extra Risk?")
print("="*80)

print("\nRisk Comparison:")
print("-" * 80)

# Compare volatilities
if stats1 and stats2 and stats3:
    print(f"\nVolatility (annualized):")
    print(f"  Market (SPY): {market_returns.std() * np.sqrt(12):.6f} ({market_returns.std() * np.sqrt(12)*100:.2f}%)")
    print(f"  Strategy 1 (DP only): {stats1['Volatility (annualized)']:.6f} ({stats1['Volatility (annualized)']*100:.2f}%)")
    print(f"  Strategy 2 (EP only): {stats2['Volatility (annualized)']:.6f} ({stats2['Volatility (annualized)']*100:.2f}%)")
    print(f"  Strategy 3 (DP, EP, 10Y Yield): {stats3['Volatility (annualized)']:.6f} ({stats3['Volatility (annualized)']*100:.2f}%)")
    
    # Compare VaRs
    print(f"\nVaR (5th percentile):")
    print(f"  Market (SPY): {var_market:.6f} ({var_market*100:.2f}%)")
    print(f"  Strategy 1 (DP only): {var_strategy1:.6f} ({var_strategy1*100:.2f}%)")
    print(f"  Strategy 2 (EP only): {var_strategy2:.6f} ({var_strategy2*100:.2f}%)")
    print(f"  Strategy 3 (DP, EP, 10Y Yield): {var_strategy3:.6f} ({var_strategy3*100:.2f}%)")
    
    # Compare max drawdowns
    market_cumulative = (1 + market_returns).cumprod()
    market_running_max = market_cumulative.expanding().max()
    market_drawdown = (market_cumulative - market_running_max) / market_running_max
    market_max_dd = market_drawdown.min()
    
    print(f"\nMaximum Drawdown:")
    print(f"  Market (SPY): {market_max_dd:.6f} ({market_max_dd*100:.2f}%)")
    print(f"  Strategy 1 (DP only): {stats1['Max Drawdown']:.6f} ({stats1['Max Drawdown']*100:.2f}%)")
    print(f"  Strategy 2 (EP only): {stats2['Max Drawdown']:.6f} ({stats2['Max Drawdown']*100:.2f}%)")
    print(f"  Strategy 3 (DP, EP, 10Y Yield): {stats3['Max Drawdown']:.6f} ({stats3['Max Drawdown']*100:.2f}%)")
    
    # Compare betas
    print(f"\nMarket Beta:")
    print(f"  Strategy 1 (DP only): {stats1['Market Beta']:.4f}")
    print(f"  Strategy 2 (EP only): {stats2['Market Beta']:.4f}")
    print(f"  Strategy 3 (DP, EP, 10Y Yield): {stats3['Market Beta']:.4f}")
    print(f"  (Market beta = 1.0 by definition)")

print("\n" + "="*80)
print("ASSESSMENT:")
print("="*80)
print("\nThe dynamic strategy takes on EXTRA RISK if:")
print("  - Volatility is higher than the market")
print("  - VaR is more negative (worse) than the market")
print("  - Maximum drawdown is larger (more negative) than the market")
print("  - Market beta is greater than 1.0")
print("\nCompare the metrics above to assess whether each strategy takes on extra risk.")


RISK CHARACTERISTICS ANALYSIS

1. Monthly VaR at π = 0.05 (5th percentile)
Market (SPY): -0.078272 (-7.83%)
GMO (GMWAX): -0.041147 (-4.11%)
Strategy 1 (DP only): -0.050837 (-5.08%)
Strategy 2 (EP only): -0.048324 (-4.83%)
Strategy 3 (DP, EP, 10Y Yield): -0.052337 (-5.23%)

2. Dynamic Portfolio Performance vs Risk-Free Rate (2000-2011)

Period: 2000-01-01 to 2011-12-31
Risk-free rate (monthly, average): 0.001906 (0.19%)

Strategy 1 (DP only):
  Mean monthly return: 0.004065 (0.41%)
  Mean excess over RF: 0.002158 (0.22%)
  Cumulative excess return: 0.040164 (4.02%)
  ✗ Outperforms risk-free rate (positive excess return)

Strategy 2 (EP only):
  Mean monthly return: 0.002383 (0.24%)
  Mean excess over RF: 0.000476 (0.05%)
  Cumulative excess return: -0.079161 (-7.92%)
  ✗ Outperforms risk-free rate (positive excess return)

Strategy 3 (DP, EP, 10Y Yield):
  Mean monthly return: 0.004703 (0.47%)
  Mean excess over RF: 0.002796 (0.28%)
  Cumulative excess return: 0.205549 (20.55%)
  ✗ Outp

In [12]:
# ============================================================================
# 4. Out-of-Sample Forecasting
# Focus on using both DP and EP as signals
# ============================================================================

print("="*80)
print("OUT-OF-SAMPLE FORECASTING")
print("="*80)
print("\nUsing both DP and EP as signals for forecasting")

# Align data
common_idx = signals.index.intersection(spy_excess.index)
signals_aligned = signals.loc[common_idx, [dp_col, ep_col]]
spy_aligned = spy_excess.loc[common_idx]

# Sort by date to ensure proper ordering
signals_aligned = signals_aligned.sort_index()
spy_aligned = spy_aligned.sort_index()

# Remove any NaN rows
valid_mask = ~(signals_aligned.isna().any(axis=1) | spy_aligned.isna())
signals_clean = signals_aligned[valid_mask]
spy_clean = spy_aligned[valid_mask]

print(f"\nData range: {spy_clean.index.min().date()} to {spy_clean.index.max().date()}")
print(f"Total observations: {len(spy_clean)}")
print(f"Starting OOS at t=60 (observation {spy_clean.index[60]})")

# Initialize storage for forecasts and errors
forecast_errors = []
null_errors = []
forecast_values = []
null_forecast_values = []
actual_returns = []

# Rolling out-of-sample procedure
# Start at t=60 (0-indexed, so t=60 means we use observations 0-60 to forecast 61)
start_idx = 60
T = len(spy_clean)

print(f"\nRunning rolling OOS procedure from t={start_idx} to t={T-2}...")

for t in range(start_idx, T - 1):  # T-1 because we need t+1 to exist
    # Data through time t (for estimation)
    spy_train = spy_clean.iloc[:t+1]  # Observations 0 to t (inclusive)
    signals_train = signals_clean.iloc[:t+1]
    
    # X_t (predictors at time t) for forecasting
    x_t = signals_clean.iloc[t:t+1]  # Signals at time t
    
    # r^{SPY}_{t+1} (actual return at time t+1)
    r_t_plus_1 = spy_clean.iloc[t+1]
    
    # Drop NaN from training data
    train_mask = ~(signals_train.isna().any(axis=1) | spy_train.isna())
    signals_train_clean = signals_train[train_mask]
    spy_train_clean = spy_train[train_mask]
    
    if len(signals_train_clean) < 2:  # Need at least 2 observations
        continue
    
    # Estimate regression using data through time t
    # r^{SPY}_i = α + β'X_{i-1} + ε_i for i = 1 to t
    # We need to lag the signals
    signals_train_lagged = signals_train_clean.shift(1).iloc[1:]  # X_{i-1}
    spy_train_aligned = spy_train_clean.iloc[1:]  # r^{SPY}_i
    
    # Align after lag
    common_train_idx = signals_train_lagged.index.intersection(spy_train_aligned.index)
    signals_train_final = signals_train_lagged.loc[common_train_idx]
    spy_train_final = spy_train_aligned.loc[common_train_idx]
    
    if len(signals_train_final) < 2:
        continue
    
    # Fit regression
    X_train = signals_train_final.values
    y_train = spy_train_final.values
    
    reg = LinearRegression()
    reg.fit(X_train, y_train)
    
    # Forecast r^{SPY}_{t+1} using x_t
    x_t_values = x_t.values
    forecast = reg.intercept_ + np.dot(reg.coef_, x_t_values[0])
    
    # Forecast error
    forecast_error = r_t_plus_1 - forecast
    
    # Null forecast: mean of returns through time t
    null_forecast = spy_train_clean.mean()
    null_error = r_t_plus_1 - null_forecast
    
    # Store results
    forecast_errors.append(forecast_error)
    null_errors.append(null_error)
    forecast_values.append(forecast)
    null_forecast_values.append(null_forecast)
    actual_returns.append(r_t_plus_1)

# Convert to arrays
forecast_errors = np.array(forecast_errors)
null_errors = np.array(null_errors)

# Calculate out-of-sample R²
sum_squared_forecast_errors = np.sum(forecast_errors ** 2)
sum_squared_null_errors = np.sum(null_errors ** 2)

if sum_squared_null_errors > 0:
    r2_oos = 1 - (sum_squared_forecast_errors / sum_squared_null_errors)
else:
    r2_oos = np.nan

# Display results
print("\n" + "="*80)
print("OUT-OF-SAMPLE RESULTS")
print("="*80)
print(f"\nNumber of OOS forecasts: {len(forecast_errors)}")
print(f"Sum of squared forecast errors: {sum_squared_forecast_errors:.6f}")
print(f"Sum of squared null errors: {sum_squared_null_errors:.6f}")
print(f"\nOut-of-Sample R²: {r2_oos:.6f}")

if r2_oos > 0:
    print(f"\n✓ YES - The forecasting strategy produced a POSITIVE R²_OOS ({r2_oos:.6f})")
    print("  This means the model forecasts are better than the null (mean) forecast.")
elif r2_oos < 0:
    print(f"\n✗ NO - The forecasting strategy produced a NEGATIVE R²_OOS ({r2_oos:.6f})")
    print("  This means the model forecasts are worse than the null (mean) forecast.")
else:
    print(f"\nThe forecasting strategy produced R²_OOS = 0")
    print("  This means the model forecasts perform the same as the null (mean) forecast.")

# Additional statistics
print("\n" + "="*80)
print("ADDITIONAL STATISTICS")
print("="*80)
print(f"Mean forecast error: {np.mean(forecast_errors):.6f}")
print(f"RMSE (forecast): {np.sqrt(np.mean(forecast_errors**2)):.6f}")
print(f"Mean null error: {np.mean(null_errors):.6f}")
print(f"RMSE (null): {np.sqrt(np.mean(null_errors**2)):.6f}")

if np.sqrt(np.mean(forecast_errors**2)) < np.sqrt(np.mean(null_errors**2)):
    improvement = ((np.sqrt(np.mean(null_errors**2)) - np.sqrt(np.mean(forecast_errors**2))) / 
                   np.sqrt(np.mean(null_errors**2))) * 100
    print(f"\nForecast RMSE is {improvement:.2f}% lower than null forecast RMSE")
else:
    deterioration = ((np.sqrt(np.mean(forecast_errors**2)) - np.sqrt(np.mean(null_errors**2))) / 
                     np.sqrt(np.mean(null_errors**2))) * 100
    print(f"\nForecast RMSE is {deterioration:.2f}% higher than null forecast RMSE")


OUT-OF-SAMPLE FORECASTING

Using both DP and EP as signals for forecasting

Data range: 1996-12-31 to 2025-10-31
Total observations: 347
Starting OOS at t=60 (observation 2001-12-31 00:00:00)

Running rolling OOS procedure from t=60 to t=345...

OUT-OF-SAMPLE RESULTS

Number of OOS forecasts: 286
Sum of squared forecast errors: 0.551446
Sum of squared null errors: 0.527088

Out-of-Sample R²: -0.046212

✗ NO - The forecasting strategy produced a NEGATIVE R²_OOS (-0.046212)
  This means the model forecasts are worse than the null (mean) forecast.

ADDITIONAL STATISTICS
Mean forecast error: 0.000063
RMSE (forecast): 0.043910
Mean null error: 0.002869
RMSE (null): 0.042930

Forecast RMSE is 2.28% higher than null forecast RMSE


In [13]:
# ============================================================================
# 2. Redo 3.2 with OOS Forecasts
# Build trading strategy using out-of-sample forecasts and compare to in-sample
# ============================================================================

print("="*80)
print("TRADING STRATEGY FROM OUT-OF-SAMPLE FORECASTS")
print("="*80)

# We need to rebuild the OOS procedure to get forecasts and build the strategy
# The forecasts from the previous cell should be available, but let's rebuild to ensure we have everything

# Rebuild OOS forecasts and strategy returns
oos_forecasts_list = []
oos_weights_list = []
oos_strategy_returns_list = []
oos_actual_returns_list = []
oos_dates = []

# Align data (same as before)
common_idx = signals.index.intersection(spy_excess.index)
signals_aligned = signals.loc[common_idx, [dp_col, ep_col]]
spy_aligned = spy_excess.loc[common_idx]

signals_aligned = signals_aligned.sort_index()
spy_aligned = spy_aligned.sort_index()

valid_mask = ~(signals_aligned.isna().any(axis=1) | spy_aligned.isna())
signals_clean = signals_aligned[valid_mask]
spy_clean = spy_aligned[valid_mask]

start_idx = 60
T = len(spy_clean)

print(f"\nBuilding OOS trading strategy from t={start_idx} to t={T-2}...")

for t in range(start_idx, T - 1):
    # Data through time t (for estimation)
    spy_train = spy_clean.iloc[:t+1]
    signals_train = signals_clean.iloc[:t+1]
    
    # X_t (predictors at time t) for forecasting
    x_t = signals_clean.iloc[t:t+1]
    
    # r^{SPY}_{t+1} (actual return at time t+1)
    r_t_plus_1 = spy_clean.iloc[t+1]
    date_t_plus_1 = spy_clean.index[t+1]
    
    # Drop NaN from training data
    train_mask = ~(signals_train.isna().any(axis=1) | spy_train.isna())
    signals_train_clean = signals_train[train_mask]
    spy_train_clean = spy_train[train_mask]
    
    if len(signals_train_clean) < 2:
        continue
    
    # Estimate regression using data through time t (with lagging)
    signals_train_lagged = signals_train_clean.shift(1).iloc[1:]
    spy_train_aligned = spy_train_clean.iloc[1:]
    
    common_train_idx = signals_train_lagged.index.intersection(spy_train_aligned.index)
    signals_train_final = signals_train_lagged.loc[common_train_idx]
    spy_train_final = spy_train_aligned.loc[common_train_idx]
    
    if len(signals_train_final) < 2:
        continue
    
    # Fit regression
    X_train = signals_train_final.values
    y_train = spy_train_final.values
    
    reg = LinearRegression()
    reg.fit(X_train, y_train)
    
    # Forecast r^{SPY}_{t+1} using x_t
    x_t_values = x_t.values
    forecast = reg.intercept_ + np.dot(reg.coef_, x_t_values[0])
    
    # Portfolio weight: w_t = 100 * r̂^{SPY}_{t+1}
    weight = 100 * forecast
    
    # Strategy return: r^x_{t+1} = w_t * r^{SPY}_{t+1}
    strategy_return = weight * r_t_plus_1
    
    # Store results
    oos_forecasts_list.append(forecast)
    oos_weights_list.append(weight)
    oos_strategy_returns_list.append(strategy_return)
    oos_actual_returns_list.append(r_t_plus_1)
    oos_dates.append(date_t_plus_1)

# Create Series for OOS strategy
oos_strategy_returns = pd.Series(oos_strategy_returns_list, index=oos_dates)
oos_actual_returns_series = pd.Series(oos_actual_returns_list, index=oos_dates)

# Calculate OOS strategy statistics
def calculate_strategy_stats_oos(strategy_returns, market_returns):
    """Calculate statistics for OOS trading strategy."""
    if len(strategy_returns) == 0:
        return None
    
    # Mean, volatility, Sharpe (annualized)
    mean_monthly = strategy_returns.mean()
    vol_monthly = strategy_returns.std()
    sharpe_monthly = mean_monthly / vol_monthly if vol_monthly > 0 else np.nan
    
    mean_annual = mean_monthly * 12
    vol_annual = vol_monthly * np.sqrt(12)
    sharpe_annual = mean_annual / vol_annual if vol_annual > 0 else np.nan
    
    # Maximum drawdown
    cumulative = (1 + strategy_returns).cumprod()
    running_max = cumulative.expanding().max()
    drawdown = (cumulative - running_max) / running_max
    max_drawdown = drawdown.min()
    
    # Market alpha and beta
    # Align indices
    common_idx = strategy_returns.index.intersection(market_returns.index)
    strategy_aligned = strategy_returns.loc[common_idx]
    market_aligned = market_returns.loc[common_idx]
    
    if len(strategy_aligned) > 0:
        X = market_aligned.values.reshape(-1, 1)
        y = strategy_aligned.values
        
        reg = LinearRegression()
        reg.fit(X, y)
        y_pred = reg.predict(X)
        
        alpha = reg.intercept_
        beta = reg.coef_[0]
        
        # Information ratio = alpha / tracking error
        residuals = y - y_pred
        tracking_error = np.std(residuals)
        information_ratio = alpha / tracking_error if tracking_error > 0 else np.nan
        info_ratio_annual = information_ratio * np.sqrt(12) if not np.isnan(information_ratio) else np.nan
    else:
        alpha = np.nan
        beta = np.nan
        info_ratio_annual = np.nan
    
    return {
        'Mean (annualized)': mean_annual,
        'Volatility (annualized)': vol_annual,
        'Sharpe Ratio (annualized)': sharpe_annual,
        'Max Drawdown': max_drawdown,
        'Market Alpha': alpha,
        'Market Beta': beta,
        'Information Ratio (annualized)': info_ratio_annual,
        'N observations': len(strategy_returns)
    }

# Calculate OOS strategy statistics
oos_stats = calculate_strategy_stats_oos(oos_strategy_returns, spy_excess)

# Get in-sample strategy statistics (Strategy 3 from section 3.2, which used DP and EP)
# Note: strategy3 should be available from the earlier code
in_sample_stats = stats3 if 'stats3' in globals() and stats3 else None

# Display results
print("\n" + "="*80)
print("OUT-OF-SAMPLE STRATEGY STATISTICS")
print("="*80)
if oos_stats:
    for key, value in oos_stats.items():
        if isinstance(value, float):
            print(f"{key}: {value:.6f}")
        else:
            print(f"{key}: {value}")

print("\n" + "="*80)
print("IN-SAMPLE STRATEGY STATISTICS (from section 3.2)")
print("="*80)
if in_sample_stats:
    for key, value in in_sample_stats.items():
        if isinstance(value, float):
            print(f"{key}: {value:.6f}")
        else:
            print(f"{key}: {value}")
else:
    print("In-sample statistics not available. Please run section 3.2 first.")

# Comparison
print("\n" + "="*80)
print("COMPARISON: OOS vs IN-SAMPLE STRATEGY")
print("="*80)

if oos_stats and in_sample_stats:
    print("\nMean Return (annualized):")
    print(f"  OOS Strategy: {oos_stats['Mean (annualized)']:.6f} ({oos_stats['Mean (annualized)']*100:.2f}%)")
    print(f"  In-Sample Strategy: {in_sample_stats['Mean (annualized)']:.6f} ({in_sample_stats['Mean (annualized)']*100:.2f}%)")
    mean_diff = oos_stats['Mean (annualized)'] - in_sample_stats['Mean (annualized)']
    print(f"  Difference: {mean_diff:+.6f} ({mean_diff*100:+.2f}%)")
    
    print("\nVolatility (annualized):")
    print(f"  OOS Strategy: {oos_stats['Volatility (annualized)']:.6f} ({oos_stats['Volatility (annualized)']*100:.2f}%)")
    print(f"  In-Sample Strategy: {in_sample_stats['Volatility (annualized)']:.6f} ({in_sample_stats['Volatility (annualized)']*100:.2f}%)")
    vol_diff = oos_stats['Volatility (annualized)'] - in_sample_stats['Volatility (annualized)']
    print(f"  Difference: {vol_diff:+.6f} ({vol_diff*100:+.2f}%)")
    
    print("\nSharpe Ratio (annualized):")
    print(f"  OOS Strategy: {oos_stats['Sharpe Ratio (annualized)']:.6f}")
    print(f"  In-Sample Strategy: {in_sample_stats['Sharpe Ratio (annualized)']:.6f}")
    sharpe_diff = oos_stats['Sharpe Ratio (annualized)'] - in_sample_stats['Sharpe Ratio (annualized)']
    print(f"  Difference: {sharpe_diff:+.6f}")
    
    print("\nMaximum Drawdown:")
    print(f"  OOS Strategy: {oos_stats['Max Drawdown']:.6f} ({oos_stats['Max Drawdown']*100:.2f}%)")
    print(f"  In-Sample Strategy: {in_sample_stats['Max Drawdown']:.6f} ({in_sample_stats['Max Drawdown']*100:.2f}%)")
    dd_diff = oos_stats['Max Drawdown'] - in_sample_stats['Max Drawdown']
    print(f"  Difference: {dd_diff:+.6f} ({dd_diff*100:+.2f}%)")
    
    print("\nMarket Alpha:")
    print(f"  OOS Strategy: {oos_stats['Market Alpha']:.6f}")
    print(f"  In-Sample Strategy: {in_sample_stats['Market Alpha']:.6f}")
    alpha_diff = oos_stats['Market Alpha'] - in_sample_stats['Market Alpha']
    print(f"  Difference: {alpha_diff:+.6f}")
    
    print("\nMarket Beta:")
    print(f"  OOS Strategy: {oos_stats['Market Beta']:.6f}")
    print(f"  In-Sample Strategy: {in_sample_stats['Market Beta']:.6f}")
    beta_diff = oos_stats['Market Beta'] - in_sample_stats['Market Beta']
    print(f"  Difference: {beta_diff:+.6f}")
    
    print("\nInformation Ratio (annualized):")
    print(f"  OOS Strategy: {oos_stats['Information Ratio (annualized)']:.6f}")
    print(f"  In-Sample Strategy: {in_sample_stats['Information Ratio (annualized)']:.6f}")
    ir_diff = oos_stats['Information Ratio (annualized)'] - in_sample_stats['Information Ratio (annualized)']
    print(f"  Difference: {ir_diff:+.6f}")
    

TRADING STRATEGY FROM OUT-OF-SAMPLE FORECASTS

Building OOS trading strategy from t=60 to t=345...

OUT-OF-SAMPLE STRATEGY STATISTICS
Mean (annualized): 0.037512
Volatility (annualized): 0.160524
Sharpe Ratio (annualized): 0.233686
Max Drawdown: -0.651383
Market Alpha: -0.001413
Market Beta: 0.618728
Information Ratio (annualized): -0.129042
N observations: 286

IN-SAMPLE STRATEGY STATISTICS (from section 3.2)
Mean (annualized): 0.093602
Volatility (annualized): 0.156400
Sharpe Ratio (annualized): 0.598479
Max Drawdown: -0.645193
Market Alpha: 0.007552
Market Beta: 0.035999
Information Ratio (annualized): 0.580627
N observations: 346

COMPARISON: OOS vs IN-SAMPLE STRATEGY

Mean Return (annualized):
  OOS Strategy: 0.037512 (3.75%)
  In-Sample Strategy: 0.093602 (9.36%)
  Difference: -0.056090 (-5.61%)

Volatility (annualized):
  OOS Strategy: 0.160524 (16.05%)
  In-Sample Strategy: 0.156400 (15.64%)
  Difference: +0.004125 (+0.41%)

Sharpe Ratio (annualized):
  OOS Strategy: 0.233686
 

In [14]:
# ============================================================================
# 3. Redo 3.3 with OOS Forecasts - Risk Characteristics
# Is the point-in-time (OOS) version of the strategy riskier?
# ============================================================================

print("="*80)
print("RISK CHARACTERISTICS: OOS STRATEGY vs IN-SAMPLE STRATEGY")
print("="*80)

# ============================================================================
# 1. Compute monthly VaR at π = 0.05 for OOS strategy
# ============================================================================

def calculate_var(returns, percentile=0.05):
    """Calculate Value at Risk (VaR) using historical quantile."""
    if len(returns) == 0:
        return np.nan
    return returns.quantile(percentile)

print("\n" + "="*80)
print("1. Monthly VaR at π = 0.05 (5th percentile)")
print("="*80)

# OOS strategy VaR
if 'oos_strategy_returns' in globals() and len(oos_strategy_returns) > 0:
    var_oos = calculate_var(oos_strategy_returns, 0.05)
    print(f"OOS Strategy: {var_oos:.6f} ({var_oos*100:.2f}%)")
else:
    var_oos = np.nan
    print("OOS Strategy: Not available")

# In-sample strategy VaR (from section 3.3)
if 'strategy3' in globals() and strategy3:
    var_in_sample = calculate_var(strategy3['strategy_returns'], 0.05)
    print(f"In-Sample Strategy: {var_in_sample:.6f} ({var_in_sample*100:.2f}%)")
else:
    var_in_sample = np.nan
    print("In-Sample Strategy: Not available")

# Market and GMO VaR (for reference)
market_returns = spy_excess
gmo_returns = excess_returns['GMWAX'].dropna()
var_market = calculate_var(market_returns, 0.05)
var_gmo = calculate_var(gmo_returns, 0.05)
print(f"Market (SPY): {var_market:.6f} ({var_market*100:.2f}%)")
print(f"GMO (GMWAX): {var_gmo:.6f} ({var_gmo*100:.2f}%)")

# ============================================================================
# 2. Check if OOS strategy underperforms risk-free rate from 2000-2011
# ============================================================================

print("\n" + "="*80)
print("2. OOS Strategy Performance vs Risk-Free Rate (2000-2011)")
print("="*80)

start_2000 = pd.Timestamp('2000-01-01')
end_2011 = pd.Timestamp('2011-12-31')

# Get risk-free rate for this period
rf_period = risk_free_rate['TBill 3M'].loc[(risk_free_rate.index >= start_2000) & 
                                            (risk_free_rate.index <= end_2011)]

# Convert to monthly if annualized
if rf_period.max() > 0.1:
    rf_monthly = rf_period / 12
else:
    rf_monthly = rf_period

print(f"\nPeriod: {start_2000.date()} to {end_2011.date()}")
print(f"Risk-free rate (monthly, average): {rf_monthly.mean():.6f} ({rf_monthly.mean()*100:.2f}%)")

# OOS strategy performance
if 'oos_strategy_returns' in globals() and len(oos_strategy_returns) > 0:
    oos_period = oos_strategy_returns.loc[(oos_strategy_returns.index >= start_2000) & 
                                          (oos_strategy_returns.index <= end_2011)]
    
    if len(oos_period) > 0:
        common_idx = oos_period.index.intersection(rf_monthly.index)
        oos_aligned = oos_period.loc[common_idx]
        rf_aligned = rf_monthly.loc[common_idx]
        
        excess_over_rf_oos = oos_aligned - rf_aligned
        mean_excess_oos = excess_over_rf_oos.mean()
        total_excess_oos = (1 + excess_over_rf_oos).prod() - 1
        
        print(f"\nOOS Strategy:")
        print(f"  Mean monthly return: {oos_aligned.mean():.6f} ({oos_aligned.mean()*100:.2f}%)")
        print(f"  Mean excess over RF: {mean_excess_oos:.6f} ({mean_excess_oos*100:.2f}%)")
        print(f"  Cumulative excess return: {total_excess_oos:.6f} ({total_excess_oos*100:.2f}%)")
        
        if mean_excess_oos < 0:
            print(f"  ✓ Underperforms risk-free rate")
        else:
            print(f"  ✗ Outperforms risk-free rate")

# In-sample strategy performance (for comparison)
if 'strategy3' in globals() and strategy3:
    in_sample_period = strategy3['strategy_returns'].loc[(strategy3['strategy_returns'].index >= start_2000) & 
                                                         (strategy3['strategy_returns'].index <= end_2011)]
    
    if len(in_sample_period) > 0:
        common_idx = in_sample_period.index.intersection(rf_monthly.index)
        in_sample_aligned = in_sample_period.loc[common_idx]
        rf_aligned = rf_monthly.loc[common_idx]
        
        excess_over_rf_in = in_sample_aligned - rf_aligned
        mean_excess_in = excess_over_rf_in.mean()
        total_excess_in = (1 + excess_over_rf_in).prod() - 1
        
        print(f"\nIn-Sample Strategy:")
        print(f"  Mean monthly return: {in_sample_aligned.mean():.6f} ({in_sample_aligned.mean()*100:.2f}%)")
        print(f"  Mean excess over RF: {mean_excess_in:.6f} ({mean_excess_in*100:.2f}%)")
        print(f"  Cumulative excess return: {total_excess_in:.6f} ({total_excess_in*100:.2f}%)")
        
        if mean_excess_in < 0:
            print(f"  ✓ Underperforms risk-free rate")
        else:
            print(f"  ✗ Outperforms risk-free rate")

# ============================================================================
# 3. Count periods with negative risk premium (based on OOS forecasts)
# ============================================================================

print("\n" + "="*80)
print("3. Periods with Negative Risk Premium (OOS forecasts)")
print("="*80)

# Count negative forecasts from OOS procedure
if 'oos_forecasts_list' in globals() and len(oos_forecasts_list) > 0:
    oos_forecasts_array = np.array(oos_forecasts_list)
    negative_count_oos = np.sum(oos_forecasts_array < 0)
    total_count_oos = len(oos_forecasts_array)
    negative_pct_oos = (negative_count_oos / total_count_oos) * 100 if total_count_oos > 0 else 0
    
    print(f"\nOOS Strategy:")
    print(f"  Periods with negative risk premium: {negative_count_oos} out of {total_count_oos} ({negative_pct_oos:.2f}%)")
else:
    print("\nOOS forecasts not available")

# In-sample comparison (from section 3.3)
if 'neg3' in globals() and neg3:
    print(f"\nIn-Sample Strategy (from section 3.3):")
    print(f"  Periods with negative risk premium: {neg3['negative_count']} out of {neg3['total_count']} ({neg3['negative_pct']:.2f}%)")

# ============================================================================
# 4. Assess if OOS strategy takes on extra risk (compared to in-sample and market)
# ============================================================================

print("\n" + "="*80)
print("4. Is the Point-in-Time (OOS) Strategy Riskier?")
print("="*80)

print("\nRisk Comparison:")
print("-" * 80)

# Get statistics
oos_stats_available = 'oos_stats' in globals() and oos_stats
in_sample_stats_available = 'stats3' in globals() and stats3

if oos_stats_available and in_sample_stats_available:
    print("\nVolatility (annualized):")
    print(f"  OOS Strategy: {oos_stats['Volatility (annualized)']:.6f} ({oos_stats['Volatility (annualized)']*100:.2f}%)")
    print(f"  In-Sample Strategy: {stats3['Volatility (annualized)']:.6f} ({stats3['Volatility (annualized)']*100:.2f}%)")
    print(f"  Market (SPY): {market_returns.std() * np.sqrt(12):.6f} ({market_returns.std() * np.sqrt(12)*100:.2f}%)")
    
    vol_diff = oos_stats['Volatility (annualized)'] - stats3['Volatility (annualized)']
    vol_diff_market = oos_stats['Volatility (annualized)'] - (market_returns.std() * np.sqrt(12))
    
    print(f"\n  OOS vs In-Sample: {vol_diff:+.6f} ({vol_diff*100:+.2f}%)")
    print(f"  OOS vs Market: {vol_diff_market:+.6f} ({vol_diff_market*100:+.2f}%)")
    
    print("\nVaR (5th percentile):")
    print(f"  OOS Strategy: {var_oos:.6f} ({var_oos*100:.2f}%)")
    print(f"  In-Sample Strategy: {var_in_sample:.6f} ({var_in_sample*100:.2f}%)")
    print(f"  Market (SPY): {var_market:.6f} ({var_market*100:.2f}%)")
    
    var_diff = var_oos - var_in_sample
    var_diff_market = var_oos - var_market
    
    print(f"\n  OOS vs In-Sample: {var_diff:+.6f} ({var_diff*100:+.2f}%)")
    print(f"  OOS vs Market: {var_diff_market:+.6f} ({var_diff_market*100:+.2f}%)")
    print(f"  (More negative = riskier)")
    
    print("\nMaximum Drawdown:")
    print(f"  OOS Strategy: {oos_stats['Max Drawdown']:.6f} ({oos_stats['Max Drawdown']*100:.2f}%)")
    print(f"  In-Sample Strategy: {stats3['Max Drawdown']:.6f} ({stats3['Max Drawdown']*100:.2f}%)")
    
    # Market max drawdown
    market_cumulative = (1 + market_returns).cumprod()
    market_running_max = market_cumulative.expanding().max()
    market_drawdown = (market_cumulative - market_running_max) / market_running_max
    market_max_dd = market_drawdown.min()
    
    print(f"  Market (SPY): {market_max_dd:.6f} ({market_max_dd*100:.2f}%)")
    
    dd_diff = oos_stats['Max Drawdown'] - stats3['Max Drawdown']
    dd_diff_market = oos_stats['Max Drawdown'] - market_max_dd
    
    print(f"\n  OOS vs In-Sample: {dd_diff:+.6f} ({dd_diff*100:+.2f}%)")
    print(f"  OOS vs Market: {dd_diff_market:+.6f} ({dd_diff_market*100:+.2f}%)")
    print(f"  (More negative = riskier)")
    
    print("\nMarket Beta:")
    print(f"  OOS Strategy: {oos_stats['Market Beta']:.6f}")
    print(f"  In-Sample Strategy: {stats3['Market Beta']:.6f}")
    print(f"  Market (SPY): 1.0 (by definition)")
    
    beta_diff = oos_stats['Market Beta'] - stats3['Market Beta']
    beta_diff_market = oos_stats['Market Beta'] - 1.0
    
    print(f"\n  OOS vs In-Sample: {beta_diff:+.6f}")
    print(f"  OOS vs Market: {beta_diff_market:+.6f}")
    print(f"  (Higher = riskier)")
    

RISK CHARACTERISTICS: OOS STRATEGY vs IN-SAMPLE STRATEGY

1. Monthly VaR at π = 0.05 (5th percentile)
OOS Strategy: -0.055928 (-5.59%)
In-Sample Strategy: -0.052337 (-5.23%)
Market (SPY): -0.078272 (-7.83%)
GMO (GMWAX): -0.041147 (-4.11%)

2. OOS Strategy Performance vs Risk-Free Rate (2000-2011)

Period: 2000-01-01 to 2011-12-31
Risk-free rate (monthly, average): 0.001906 (0.19%)

OOS Strategy:
  Mean monthly return: -0.000845 (-0.08%)
  Mean excess over RF: -0.002356 (-0.24%)
  Cumulative excess return: -0.431288 (-43.13%)
  ✓ Underperforms risk-free rate

In-Sample Strategy:
  Mean monthly return: 0.004703 (0.47%)
  Mean excess over RF: 0.002796 (0.28%)
  Cumulative excess return: 0.205549 (20.55%)
  ✗ Outperforms risk-free rate

3. Periods with Negative Risk Premium (OOS forecasts)

OOS Strategy:
  Periods with negative risk premium: 34 out of 286 (11.89%)

In-Sample Strategy (from section 3.3):
  Periods with negative risk premium: 39 out of 347 (11.24%)

4. Is the Point-in-Time (