# Calculating Betas for the Market's Stocks #

### Calculating the Betas for all the Stocks in the Universe ###

In [1]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
from scipy.stats import norm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from capm_toolkit import compute_excess_returns
from capm_toolkit import capm_regression
from capm_toolkit import rolling_capm_regression
from capm_toolkit import compute_factor_contributions
from capm_toolkit import compute_residual_returns

In [2]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(rf"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)
rfr.dropna(inplace = True)

# Get the important data for the S&P500

sp500 = pd.read_csv(rf"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

In [3]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = {} 

# List all files in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        # Full path to the file
        file_path = os.path.join(folder_path, file)
        
        # Read the Excel file
        df = pd.read_csv(file_path)
        df = df.set_index("Date")
        df.index = pd.to_datetime(df.index)

        df = df['Adjusted_close']

        df = df.rename("adj_close")
        
        # Fill nans
        df = df.interpolate(method='time')

        df = df.loc['2015-01-01':]

        df.dropna(inplace=True)
        
        if len(df) >= 2000:
            # File name without extension
            file_name = os.path.splitext(file)[0]
            
            # Guardar en el diccionario
            dataframes[file_name] = df
            print(f"File loaded: {file_name} ({len(df)} rows)")
        else:
            print(f"File skipped (less than 2000 rows after cleaning): {file}")

print(f"\nTotal files loaded: {len(dataframes)}")
print("Files loaded:", list(dataframes.keys()))

In [4]:
# Let us obtain the betas of each stock

alphas_dict = {}
betas_dict = {}
sigma_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = rolling_capm_regression(
        dataframes[ticker], 
        sp500['sp_500'], 
        rfr['risk_free_rate']
    )
    alphas_dict[ticker] = df['alpha']
    betas_dict[ticker] = df['beta']
    sigma_dict[ticker] = df['sigma']

    print(f'{ticker} is ready.')

In [5]:
# Create Alpha's DataFrame
alpha_df = pd.DataFrame.from_dict(alphas_dict)
alpha_df = alpha_df.apply(lambda x: x.fillna(x.mean()), axis=0)

alpha_df

In [6]:
# Create Beta's DataFrame
betas_df = pd.DataFrame.from_dict(betas_dict)
betas_df = betas_df.apply(lambda x: x.fillna(x.mean()), axis=0)

betas_df

In [7]:
# Create the Sigma's DataFrame
sigma_df = pd.DataFrame.from_dict(sigma_dict)
sigma_df = sigma_df.apply(lambda x: x.fillna(x.mean()), axis=0)

sigma_df

In [8]:
# Save the betas

alpha_df.to_csv(r"..\additional_data\capm_halpha.csv")
betas_df.to_csv(r"..\additional_data\capm_hbetas.csv")
sigma_df.to_csv(r"..\additional_data\capm_hsigma.csv")

In [9]:
# Create the Returns DataFrame
returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = dataframes[ticker].pct_change(1).dropna()
    returns_dict[ticker] = df

# Create the DataFrame
df_returns = pd.DataFrame.from_dict(returns_dict)
df_returns = df_returns.apply(lambda x: x.fillna(x.mean()), axis=0)

df_returns

In [10]:
# Create the Excess Returns DataFrame
excess_returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = compute_excess_returns(dataframes[ticker], rfr['risk_free_rate'])
    excess_returns_dict[ticker] = df

# Create the DataFrame
df_excess_returns = pd.DataFrame.from_dict(excess_returns_dict)
df_excess_returns = df_excess_returns.apply(lambda x: x.fillna(x.mean()), axis=0)

df_excess_returns

In [11]:
# Calculate the Market Excess Returns

market_excess_returns = compute_excess_returns(sp500['sp_500'], rfr['risk_free_rate'])
market_excess_returns.name = 'market_excess_returns'

market_excess_returns

In [12]:
# Common Indexes
common_index = df_excess_returns.index.intersection(market_excess_returns.index)

# Reindex
df_excess_returns, market_excess_returns = (
    df_excess_returns.loc[common_index], 
    market_excess_returns.loc[common_index]
)

print(df_excess_returns.shape)
print(market_excess_returns.shape)

In [13]:
# Set the Window
window = len(df_excess_returns)

# Create the Betas and Alpha + Residuals DataFrames for the whole time stamp
capm_betas_dict = {}

# Loop to Obtain Betas and Alpha + Residuals
for ticker in df_excess_returns.columns:
    
    # Fit the WLS model
    model = capm_regression(
        df_excess_returns[ticker], 
        market_excess_returns,
        window=window,
        WLS=True
    )

    # Extract Alpha and Beta
    alpha = model.params.iloc[0]
    beta = model.params.iloc[1]

    # Store Beta
    betas_dict[ticker] = beta

# Create Beta Series
betas_series = pd.Series(betas_dict)

betas_series

In [14]:
# Plot
ticker = 'WMT'

# Mean
mean = betas_df[ticker].mean()

# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(betas_df[ticker], label=f'{ticker} Beta', color='blue', alpha=0.7)
plt.axhline(y=betas_series.loc[ticker], color='red', linestyle='dashed', label=f'{ticker} Historical Beta')
plt.axhline(y=mean, color='black', linestyle='dashed', label=f'{ticker} Mean Beta')

# Config
plt.title('Beta Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()

# Show

plt.show()

In [15]:
# Compare Volatility and Betas for each stock

annualized_vol = df_returns[ticker].std() * np.sqrt(252)
comparison = betas_series.loc[ticker]/annualized_vol

print(f'{ticker} Historical Beta is: {betas_series.loc[ticker]}')
print(f'{ticker} Historical Annualized Standard Deviation is: {annualized_vol}')
print(f'{ticker} Ratio Between Both is: {comparison}')

In [16]:
# Calculate Mean and Standard Deviation
mu = betas_series.mean()
sigma = betas_series.std()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(betas_series, bins=30, density=True, color='lightskyblue', alpha=0.5, edgecolor='black', label='Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(betas_series.min(), betas_series.max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=betas_series.median(), color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

### Comparing Residuals ###

In [27]:
# Compute Residuals with the Alternative Way

stock = 'NVDA'

# Common Index
common_index = df_excess_returns.index.intersection(betas_df.index).intersection(alpha_df.index).intersection(market_excess_returns.index)

# Cut DataFrames
r_i = df_excess_returns[stock].loc[common_index]
beta = betas_df[stock].loc[common_index]
r_m = market_excess_returns.loc[common_index]

# Calculate factor returns
r_f = compute_factor_contributions(r_m, beta)

# Calculate residual returns
residual_returns = compute_residual_returns(r_i, r_m, beta)
residual_returns.name = 'residual_returns'

residual_returns

In [28]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(r_i.cumsum(), label=f'{stock} Returns', alpha=0.7)
plt.plot(r_f.cumsum(), label=f'{stock} Factor Returns', alpha=0.7)
plt.plot(residual_returns.cumsum(), label=f'{stock} Residual Returns', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

The method using the rolling betas adds so much noise to the calculation of the residual returns, especially because sometimes the alpha coefficient has a bias. So we prefer to use the residuals of the whole time stamp regression.

In [24]:
print(df_excess_returns.shape)
print(market_excess_returns.shape)
print(betas_df.shape)

In [30]:
# Calculate the Residual Returns for every stock
residual_returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = compute_residual_returns(
        df_excess_returns[ticker].loc[common_index], 
        market_excess_returns.loc[common_index],
        betas_df[ticker]
    )
    
    residual_returns_dict[ticker] = df

# Create the DataFrame
df_residual_returns = pd.DataFrame.from_dict(residual_returns_dict)

df_residual_returns['NVDA']

In [31]:
# Save the data
betas_series.to_csv(r"..\additional_data\capm_sbetas.csv")
df_residual_returns.to_csv(r"..\additional_data\capm_residual_returns.csv")