# Calculating Betas for the Market's Stocks #

### Calculating the Betas for all the Stocks in the Universe ###

In [3]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
from scipy.stats import norm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from capm_toolkit import capm_regression
from capm_toolkit import rolling_capm_regression
from capm_toolkit import compute_factor_contributions
from capm_toolkit import compute_residual_returns

In [6]:
# Get the important data for the Risk-Free Rate
rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index)
rfr.dropna(inplace = True)

# Get the important data for the S&P500
data_sp500 = pd.read_csv(r'..\additional_data\sp500.csv')
data_sp500.set_index('Date', inplace=True)
data_sp500.index = pd.to_datetime(data_sp500.index)
data_sp500 = data_sp500['sp_500']

In [8]:
# Import Universe
# tickers = pd.read_csv(r"..\additional_data\tickers.csv")
# tickers = tickers['tickers']

# Download all the data
# DataFrame to store everything
# df_returns = pd.DataFrame()

# for ticker in tickers:
#    try:
#        df = import_daily_financial_data(
#            ticker=ticker, 
#            start_date='2015-01-01', 
#            end_date='2025-01-01', 
#            returns=True
#        )
#        
#        returns = df['returns'].rename(ticker)
#        
#        df_returns = pd.concat([df_returns, returns], axis=1)
#        
#        print(f'Data Ready for {ticker}')
#        
#    except Exception as e:
#         print(f'Error reading {ticker}: {e}')

In [13]:
# Import Data
df_returns = pd.read_csv(r'..\additional_data\stocks_returns.csv')
df_returns = df_returns.rename(columns={'Unnamed: 0':'Date'})
df_returns.set_index('Date', inplace=True)
df_returns.index = pd.to_datetime(df_returns.index)

df_returns

In [14]:
tickers = df_returns.columns

In [17]:
# Let us get the betas of each stock

alphas_dict = {}
betas_dict = {}
sigma_dict = {}

# Create the Loop to Obtain the Betas
for ticker in tickers:
    df = rolling_capm_regression(
        df_returns[ticker], 
        sp500, 
        rfr['risk_free_rate']
    )
    alphas_dict[ticker] = df['alpha']
    betas_dict[ticker] = df['beta']
    sigma_dict[ticker] = df['sigma']

    print(f'{ticker} is ready.')

In [18]:
# Create Alpha's DataFrame
alpha_df = pd.DataFrame.from_dict(alphas_dict)
# alpha_df = alpha_df.apply(lambda x: x.fillna(x.mean()), axis=0)

alpha_df

In [19]:
# Create Beta's DataFrame
betas_df = pd.DataFrame.from_dict(betas_dict)
# betas_df = betas_df.apply(lambda x: x.fillna(x.mean()), axis=0)

betas_df

In [20]:
# Create the Sigma's DataFrame
sigma_df = pd.DataFrame.from_dict(sigma_dict)
# sigma_df = sigma_df.apply(lambda x: x.fillna(x.mean()), axis=0)

sigma_df

In [21]:
# Save the betas

alpha_df.to_csv(r"..\additional_data\capm_halpha.csv")
betas_df.to_csv(r"..\additional_data\capm_hbetas.csv")
sigma_df.to_csv(r"..\additional_data\capm_hsigma.csv")

In [28]:
# Calculate the Market Excess Returns
market_excess_returns = sp500 - rfr['risk_free_rate']

market_excess_returns

In [29]:
# Calculate Stocks Excess Returns
df_excess_returns = df_returns.sub(rfr['risk_free_rate'], axis=0)

df_excess_returns

In [30]:
# Common Indexes
common_index = df_excess_returns.index.intersection(market_excess_returns.index)

# Reindex
df_excess_returns, market_excess_returns = (
    df_excess_returns.loc[common_index], 
    market_excess_returns.loc[common_index]
)

print(df_excess_returns.shape)
print(market_excess_returns.shape)

In [31]:
# Set the Window
window = len(df_excess_returns)

# Create the Betas and Alpha + Residuals DataFrames for the whole time stamp
capm_betas_dict = {}

# Loop to Obtain Betas and Alpha + Residuals
for ticker in df_excess_returns.columns:
    
    # Fit the WLS model
    model = capm_regression(
        df_excess_returns[ticker], 
        market_excess_returns,
        window=window,
        WLS=True
    )

    # Extract Alpha and Beta
    alpha = model.params.iloc[0]
    beta = model.params.iloc[1]

    # Store Beta
    betas_dict[ticker] = beta

# Create Beta Series
betas_series = pd.Series(betas_dict)

betas_series

In [32]:
# Plot
ticker = 'WMT'

# Mean
mean = betas_df[ticker].mean()

# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(betas_df[ticker], label=f'{ticker} Beta', color='blue', alpha=0.7)
plt.axhline(y=betas_series.loc[ticker], color='red', linestyle='dashed', label=f'{ticker} Historical Beta')
plt.axhline(y=mean, color='black', linestyle='dashed', label=f'{ticker} Mean Beta')

# Config
plt.title('Beta Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()

# Show

plt.show()

In [33]:
# Compare Volatility and Betas for each stock

annualized_vol = df_returns[ticker].std() * np.sqrt(252)
comparison = betas_series.loc[ticker]/annualized_vol

print(f'{ticker} Historical Beta is: {betas_series.loc[ticker]}')
print(f'{ticker} Historical Annualized Standard Deviation is: {annualized_vol}')
print(f'{ticker} Ratio Between Both is: {comparison}')

In [34]:
# Calculate Mean and Standard Deviation
mu = betas_series.mean()
sigma = betas_series.std()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(betas_series, bins=30, density=True, color='lightskyblue', alpha=0.5, edgecolor='black', label='Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(betas_series.min(), betas_series.max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=betas_series.median(), color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

### Comparing Residuals ###

In [42]:
# Compute Residuals in the Alternative Way

stock = 'WMT'

# Common Index
common_index = df_excess_returns.index.intersection(betas_df.index).intersection(alpha_df.index).intersection(market_excess_returns.index)

# Cut DataFrames
r_i = df_excess_returns[stock].loc[common_index]
beta = betas_df[stock].loc[common_index]
r_m = market_excess_returns.loc[common_index]

# Calculate factor returns
r_f = compute_factor_contributions(r_m, beta)

# Calculate residual returns
residual_returns = compute_residual_returns(r_i, r_m, beta)
residual_returns.name = 'residual_returns'

residual_returns

In [43]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(r_i.cumsum(), label=f'{stock} Returns', alpha=0.7)
plt.plot(r_f.cumsum(), label=f'{stock} Factor Returns', alpha=0.7)
plt.plot(residual_returns.cumsum(), label=f'{stock} Residual Returns', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

The method using the rolling betas adds so much noise to the calculation of the residual returns, especially because sometimes the alpha coefficient has a bias. So we prefer to use the residuals of the whole time stamp regression.

In [37]:
print(df_excess_returns.shape)
print(market_excess_returns.shape)
print(betas_df.shape)

In [38]:
# Calculate the Residual Returns for every stock
residual_returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in tickers:
    df = compute_residual_returns(
        df_excess_returns[ticker].loc[common_index], 
        market_excess_returns.loc[common_index],
        betas_df[ticker]
    )
    
    residual_returns_dict[ticker] = df

# Create the DataFrame
df_residual_returns = pd.DataFrame.from_dict(residual_returns_dict)

df_residual_returns['NVDA']

In [39]:
# Save the data
betas_series.to_csv(r"..\additional_data\capm_sbetas.csv")
df_residual_returns.to_csv(r"..\additional_data\capm_residual_returns.csv")