# Building a Beta Zero Portfolio #

In [2]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import import_stock_universe
from capm_toolkit import compute_daily_returns
from capm_toolkit import compute_excess_returns
from capm_toolkit import capm_regression
from portfolios_toolkit import zero_beta_weights


In [3]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)
rfr.dropna(inplace = True)

# Get the important data for the S&P500

sp500 = pd.read_csv(rf"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

In [4]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = import_stock_universe(
    folder_path,
    ['Adjusted_close'],
    ['adj_close']
)

In [5]:
dataframes

In [6]:
returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = dataframes[ticker]['adj_close'].pct_change(1).dropna()
    returns_dict[ticker] = df

# Create the DataFrame
df_returns = pd.DataFrame.from_dict(returns_dict)
df_returns = df_returns.apply(lambda x: x.fillna(x.mean()), axis=0)

df_returns

In [7]:
# Create the Returns DataFrame
excess_returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = compute_excess_returns(dataframes[ticker]['adj_close'], rfr['risk_free_rate'])
    excess_returns_dict[ticker] = df

# Create the DataFrame
df_excess_returns = pd.DataFrame.from_dict(excess_returns_dict)
df_excess_returns = df_excess_returns.apply(lambda x: x.fillna(x.mean()), axis=0)

df_excess_returns

## Using Historical Beta (No Rebalacing) ##

In [8]:
# Calculate the Correlations Matrix
corr_matrix = df_returns.corr().values  

# Obtain Eigenvalues
eigenvalues, _ = np.linalg.eigh(corr_matrix)

# Identify how much Eigenvalues are small (high collinearity)
threshold = 1e-4  # Adjust
num_redundant = sum(eigenvalues < threshold)

print(f"Number of highly collinear variables: {num_redundant}")

In [9]:
# Create useful series
market_excess_returns = compute_excess_returns(sp500['sp_500'], rfr['risk_free_rate'])

market_excess_returns

In [10]:
# Common Index

common_index = df_excess_returns.index.intersection(market_excess_returns.index)
df_excess_returns, market_excess_returns = df_excess_returns.loc[common_index], market_excess_returns.loc[common_index]
df_returns = df_returns.loc[common_index]

In [11]:
risk_free_daily = compute_daily_returns(rfr['risk_free_rate']).loc[common_index]

In [12]:
# Create the Returns DataFrame
betas_dict = {}

# Window
window = len(common_index)

# Create the Loop to Obtain the Betas
for ticker in df_returns.columns:
    results = capm_regression(
        df_excess_returns[ticker], 
        market_excess_returns,
        window,
        True
    )

    # Extract Betas
    beta = results.params.iloc[1]
    
    # Store Betas
    betas_dict[ticker] = beta

# Create the Series
betas_series = pd.Series(betas_dict)
betas_series.name = 'beta'

betas_series

In [13]:
# Covariance Matrix

cov_matrix = df_returns.cov()

cov_matrix
    

In [15]:
# Obtain the weights
zbp_weights = zero_beta_weights(betas_series, cov_matrix)
zbp_weights = pd.Series(zbp_weights, index = betas_series.index)
zbp_weights.name = 'zbp_weights'

zbp_weights

In [16]:
print(zbp_weights.shape)
print(df_returns.shape)

In [17]:
# Obtain the portfolio w^T*Returns
zbp_returns = df_returns.values @ zbp_weights.T
zbp_returns = pd.Series(zbp_returns.flatten(), index = common_index, name = 'zbp_returns')

zbp_returns

In [18]:
# Calculate the beta

df_regression = pd.DataFrame()
df_regression['y'] = zbp_returns - risk_free_daily
df_regression['x'] = market_excess_returns
df_regression.dropna(inplace = True)
df_regression = df_regression.mul(100)

df_regression

In [19]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(df_regression['y'].cumsum(), label='Zero-Beta Portfolio Returns', color='red', alpha=0.7)
plt.plot(df_regression['x'].cumsum(), label='Benchmark Returns', color='blue', alpha=0.7)

# Config
plt.title('ZBP vs Benchmark Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [20]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# Market Returns Plot
df_regression['x'].cumsum().plot(color = 'blue', ax = ax1, alpha=0.5)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'Market Returns', 
    color='blue'
    )

# ZBP Returns Plot
ax2 = ax1.twinx()

df_regression['y'].cumsum().plot(color = 'red', ax = ax2, alpha=0.8)
ax2.set_ylabel(
    'Hedge Portfolio Returns', 
    color='red'
    )

plt.title('Returns vs Returns Time Series')
plt.show()

In [21]:
# Correlation between market and our hedge portfolio

df_regression.corr()

In [22]:
#Model specification
window = len(df_regression)

#the results of the model
results = capm_regression(
    df_regression['y'],
    df_regression['x'],
    window,
    True
)
    
#here we check the summary
print(results.summary()) 

In [58]:
# Set rolling window size
window = 252

y = df_regression['y']
x = sm.add_constant(df_regression['x'])

# Lists to store rolling coefficients
params = []
index = []
lower_bounds = []
upper_bounds = []

# Rolling regression
for i in range(window, len(df_regression)):
    Y_window = y.iloc[i - window:i]
    X_window = x.iloc[i - window:i]

    # Fit WLS model
    model = capm_regression(
        Y_window,
        X_window,
        window,
        True,
    )

    # Store coefficients (const, X1, X2)
    params.append(model.params.values)
    index.append(df_regression.index[i])  # Use the last date of the window

    # Store lower and upper bounds of 95% confidence intervals
    ci = model.conf_int(alpha=0.05)  # 95% CI
    lower_bounds.append(ci.iloc[:, 0].values)  # First column: lower bound
    upper_bounds.append(ci.iloc[:, 1].values)  # Second column: upper bound


In [59]:
# Convert list of coefficients to DataFrame
parameters_df = pd.DataFrame(params, columns=x.columns, index=index)

# Lower bounds DataFrame
lower_df = pd.DataFrame(lower_bounds, columns=[f'{col}_lower' for col in x.columns], index=index)

# Upper bounds DataFrame
upper_df = pd.DataFrame(upper_bounds, columns=[f'{col}_upper' for col in x.columns], index=index)


In [60]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(parameters_df['x'], label='Market Beta', color='black', alpha=0.7)
plt.fill_between(upper_df.index, lower_df['x_lower'], upper_df['x_upper'], color='skyblue', alpha=0.2, label='95% CI')
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Market Beta Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()
plt.grid()

# Show
plt.show()

In [61]:
# Store the Zero Beta Portfolio

zbp_returns.name = 'zero_beta_portfolio'
zbp_returns.to_csv(r"..\additional_data\zero_beta_portfolio.csv", header=True)

Why we are not concerned about look-ahead bias?

In this analysis, we are not designing a tradable investment strategy, but rather constructing a theoretical portfolio to explore the structure of systematic and non-systematic risk. By using historical betas relative to a benchmark, we create a market-neutral portfolio—orthogonal to market risk—as a way to isolate potential sources of return that are not explained by the CAPM beta. Since the portfolio is not intended for ex-ante forecasting or backtesting purposes, but rather as a risk decomposition tool, the presence of look-ahead bias does not invalidate its use in this context.