# Building a Beta Zero Portfolio #

In [101]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm
from scipy.optimize import minimize

# Manipulate Files
import os

# Pretty Notation
from IPython.display import display, Math

In [102]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)
rfr.dropna(inplace = True)

# Get the important data for the S&P500

sp500 = pd.read_csv(rf"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

In [103]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = {} 

# List all files in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        # Full path to the file
        file_path = os.path.join(folder_path, file)
        
        # Read the Excel file
        df = pd.read_csv(file_path)
        df = df.set_index("Date")
        df.index = pd.to_datetime(df.index)

        df = df['Adjusted_close']

        df = df.rename("adj_close")
        
        # Fill nans
        df = df.interpolate(method='time')

        df = df.loc['2015-01-01':]

        df.dropna(inplace=True)
        
        if len(df) >= 2000:
            # File name without extension
            file_name = os.path.splitext(file)[0]
            
            # Guardar en el diccionario
            dataframes[file_name] = df
            # print(f"File loaded: {file_name} ({len(df)} rows)")
        else:
            #print(f"File skipped (less than 2000 rows after cleaning): {file}")
            continue

print(f"\nTotal files loaded: {len(dataframes)}")
print("Files loaded:", list(dataframes.keys()))

In [104]:
# Create the Returns DataFrame
returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = dataframes[ticker].pct_change(1).dropna()
    returns_dict[ticker] = df

# Create the DataFrame
df_returns = pd.DataFrame.from_dict(returns_dict)
df_returns = df_returns.apply(lambda x: x.fillna(x.mean()), axis=0)

df_returns

## Using Historical Beta (No Rebalacing) ##

In [106]:
# Calculate the Correlations Matrix
corr_matrix = df_returns.corr().values  

# Obtain Eigenvalues
eigenvalues, _ = np.linalg.eigh(corr_matrix)

# Identify how much Eigenvalues are small (high colineality)
threshold = 1e-4  # Adjust
num_redundant = sum(eigenvalues < threshold)

print(f"Number of highly collinear variables: {num_redundant}")

In [107]:
# Create useful series
risk_free_daily = (((1 + (rfr['risk_free_rate'].div(100)))**(1/360)) - 1)
market_returns = sp500['sp_500'].pct_change(1).dropna()

In [108]:
# Common Index

common_index = df_returns.index.intersection(risk_free_daily.index).intersection(market_returns.index)
df_returns, risk_free_daily, market_returns = df_returns.loc[common_index], risk_free_daily.loc[common_index], market_returns.loc[common_index]

In [109]:
# Create the Weights function
def wexp(N, half_life):
    c = np.log(0.5)/half_life
    n = np.array(range(N))
    w = np.exp(c*n)
    return w/np.sum(w)

window = len(df_returns)
weights = window * wexp(window, window/2)

In [110]:
# Create the Returns DataFrame
betas_dict = {}

# Create the Loop to Obtain the Betas
for ticker in df_returns.columns:
    df = pd.DataFrame()
    df['y'] = df_returns[ticker] - risk_free_daily
    df['x'] = market_returns - risk_free_daily

    # Delete Nans
    df = df.dropna()

    # Add constant
    X = sm.add_constant(df['x'])
    y = df['y']

    # Adjust model
    model = sm.WLS(y, X, weights=weights, missing = 'drop').fit()

    # Extract Betas
    beta = model.params['x']
    
    # Store Betas
    betas_dict[ticker] = beta

# Create the Series
betas_series = pd.Series(betas_dict)

betas_series

In [111]:
# Now create the ZBP

def calculate_zbp(
    df_returns,
    historical_betas,
):

    # Betas
    beta = historical_betas.values.flatten().reshape(-1, 1)

    # Covariance Matrix
    Sigma = df_returns.cov().values

    # Inverse of Covariance Matrix
    # lambda_ = 1e-6  # Tikhonov Regularization
    # Sigma_inv = np.linalg.inv(Sigma + lambda_ * np.eye(Sigma.shape[0]))
    
    Sigma_inv = np.linalg.inv(Sigma)

    # Ones
    iota = np.ones((len(beta), 1))

    # Calculate the components
    C = np.dot(np.dot(iota.T, Sigma_inv), iota)
    D = np.dot(np.dot(beta.T, Sigma_inv), beta)
    E = np.dot(np.dot(beta.T, Sigma_inv), iota)
    Delta = (D*C - E*E)

    # Calculate weights
    w = ((D/Delta)*(Sigma_inv @ iota)) - ((E/Delta)*(Sigma_inv @ beta))

    # Calculate Returns
    zbp = df_returns.values @ w  # Resultado es un ndarray (T,1)

    return pd.Series(zbp.flatten(), index=df_returns.index)
    

In [112]:
# Obtain the Returns Applying the Function

zbp_returns = calculate_zbp(df_returns, betas_series)
zbp_returns.name = 'ZBP'

zbp_returns

In [113]:
# Calculate the beta

df_regression = pd.DataFrame()
df_regression['y'] = zbp_returns - risk_free_daily
df_regression['x'] = market_returns - risk_free_daily
df_regression.dropna(inplace = True)
df_regression = df_regression.mul(100)

df_regression

In [114]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(df_regression['y'].cumsum(), label='Zero-Beta Portfolio Returns', color='red', alpha=0.7)
plt.plot(df_regression['x'].cumsum(), label='Benchmark Returns', color='blue', alpha=0.7)

# Config
plt.title('ZBP vs Benchmark Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [115]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# Market Returns Plot
df_regression['x'].cumsum().plot(color = 'blue', ax = ax1, alpha=0.5)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'Market Returns', 
    color='blue'
    )

# ZBP Returns Plot
ax2 = ax1.twinx()

df_regression['y'].cumsum().plot(color = 'red', ax = ax2, alpha=0.8)
ax2.set_ylabel(
    'Hedge Portfolio Returns', 
    color='red'
    )

plt.title('Returns vs Returns Time Series')
plt.show()

In [116]:
# Correlation between market and our hedge portfolio

df_regression.corr()

In [117]:
#Model specification
window = len(df_regression)
weights = window * wexp(window, window/2)

model = sm.WLS(
    df_regression['y'], 
    sm.add_constant(df_regression['x']),
    missing='drop',
    weights=weights
    )   
     
#the results of the model
results = model.fit() 
    
#here we check the summary
print(results.summary()) 

In [118]:
# Set rolling window size
window = 252
weights = window * wexp(window, window/2)

y = df_regression['y']
x = sm.add_constant(df_regression['x'])

# Lists to store rolling coefficients
params = []
index = []
lower_bounds = []
upper_bounds = []

# Rolling regression
for i in range(window, len(df_regression)):
    Y_window = y.iloc[i - window:i]
    X_window = x.iloc[i - window:i]

    # Fit WLS model
    model = sm.WLS(
        Y_window, 
        X_window, 
        missing='drop', 
        weights=weights
    ).fit()

    # Store coefficients (const, X1, X2)
    params.append(model.params.values)
    index.append(df_regression.index[i])  # Use the last date of the window

    # Store lower and upper bounds of 95% confidence intervals
    ci = model.conf_int(alpha=0.05)  # 95% CI
    lower_bounds.append(ci.iloc[:, 0].values)  # First column: lower bound
    upper_bounds.append(ci.iloc[:, 1].values)  # Second column: upper bound


In [119]:
# Convert list of coefficients to DataFrame
parameters_df = pd.DataFrame(params, columns=x.columns, index=index)

# Lower bounds DataFrame
lower_df = pd.DataFrame(lower_bounds, columns=[f'{col}_lower' for col in x.columns], index=index)

# Upper bounds DataFrame
upper_df = pd.DataFrame(upper_bounds, columns=[f'{col}_upper' for col in x.columns], index=index)


In [120]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(parameters_df['x'], label='Market Beta', color='black', alpha=0.7)
plt.fill_between(upper_df.index, lower_df['x_lower'], upper_df['x_upper'], color='skyblue', alpha=0.2, label='95% CI')
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Market Beta Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()
plt.grid()

# Show
plt.show()

In [121]:
# Store the Zero Beta Portfolio

zbp_returns.name = 'zero_beta_portfolio'
zbp_returns.to_csv(r"..\additional_data\zero_beta.csv", header=True)

Why we are not concerned about look-ahead bias?

In this analysis, we are not designing a tradable investment strategy, but rather constructing a theoretical portfolio to explore the structure of systematic and non-systematic risk. By using historical betas relative to a benchmark, we create a market-neutral portfolio—orthogonal to market risk—as a way to isolate potential sources of return that are not explained by the CAPM beta. Since the portfolio is not intended for ex-ante forecasting or backtesting purposes, but rather as a risk decomposition tool, the presence of look-ahead bias does not invalidate its use in this context.