# Calculating Betas for the Market's Stocks #

### Calculating the Betas for all the Stocks in the Universe ###

In [4]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm
from scipy.optimize import minimize
from scipy.stats import norm

# Manipulate Files
import os

# Pretty Notation
from IPython.display import display, Math

In [5]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(rf"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)
rfr.dropna(inplace = True)

# Get the important data for the S&P500

sp500 = pd.read_csv(rf"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

In [6]:
# Create the Weights function
def wexp(N, half_life):
    c = np.log(0.5)/half_life
    n = np.array(range(N))
    w = np.exp(c*n)
    return np.flip(w/np.sum(w))

# Create the CAPM 
def CAPM(
    stock_prices: pd.Series, 
    benchmark_prices: pd.Series = sp500['sp_500'], 
    risk_free_rate: pd.Series = rfr['risk_free_rate'], 
    window: int = 252,
    WLS: bool = True,
):

    # Align time series to the same date range
    common_index = stock_prices.index.intersection(benchmark_prices.index).intersection(risk_free_rate.index)
    stock_prices = stock_prices.loc[common_index]
    benchmark_prices = benchmark_prices.loc[common_index]
    risk_free_rate = risk_free_rate.loc[common_index]
    
    # Compute daily returns
    stock_returns = stock_prices.pct_change(1)
    benchmark_returns = benchmark_prices.pct_change(1)
    risk_free_daily = (((1 + (risk_free_rate.div(100)))**(1/360)) - 1)  # Convert annual rate to daily
    
    # Excess returns
    excess_stock = stock_returns - risk_free_daily
    excess_benchmark = benchmark_returns - risk_free_daily

    alphas, betas = [], []

    # Create weights with exponential decay
    weights = window * wexp(window, window/2)
    
    for t in range(window, len(stock_returns)):
        X = excess_benchmark.iloc[t-window:t]
        y = excess_stock.iloc[t-window:t]
        
        if X.isnull().any() or y.isnull().any():
            continue

        if WLS:
            
            # Fit WLS regression
            model = sm.WLS(y, sm.add_constant(X), weights=weights, missing='drop').fit()

        else:

            # Fit OLS regression
            model = sm.OLS(y, sm.add_constant(X), missing='drop').fit()

        # Avoid KeyError by checking if params exist
        params = model.params
        
        alphas.append(params.iloc[0])
        betas.append(params.iloc[1])
            
    parameters = pd.DataFrame({
        'alpha': alphas,
        'beta': betas,
    }, index=stock_returns.index[window+1:])
    
    return parameters

In [7]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = {} 

# List all files in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        # Full path to the file
        file_path = os.path.join(folder_path, file)
        
        # Read the Excel file
        df = pd.read_csv(file_path)
        df = df.set_index("Date")
        df.index = pd.to_datetime(df.index)

        df = df['Adjusted_close']

        df = df.rename("adj_close")
        
        # Fill nans
        df = df.interpolate(method='time')

        df = df.loc['2015-01-01':]

        df.dropna(inplace=True)
        
        if len(df) >= 2000:
            # File name without extension
            file_name = os.path.splitext(file)[0]
            
            # Guardar en el diccionario
            dataframes[file_name] = df
            print(f"File loaded: {file_name} ({len(df)} rows)")
        else:
            print(f"File skipped (less than 2000 rows after cleaning): {file}")

print(f"\nTotal files loaded: {len(dataframes)}")
print("Files loaded:", list(dataframes.keys()))

In [8]:
# Let us obtain the betas of each stock

betas_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = CAPM(dataframes[ticker])
    betas_dict[ticker] = df['beta']

    print(f'{ticker} is ready.')

In [9]:
# Create the DataFrame
betas_df = pd.DataFrame.from_dict(betas_dict)
betas_df = betas_df.apply(lambda x: x.fillna(x.mean()), axis=0)

betas_df

In [10]:
# Save the betas

betas_df.to_csv(r"..\additional_data\betas.csv")

In [11]:
# Create the Returns DataFrame
returns_dict = {}

# Create the Loop to Obtain the Betas
for ticker in dataframes.keys():
    df = dataframes[ticker].pct_change(1).dropna()
    returns_dict[ticker] = df

# Create the DataFrame
df_returns = pd.DataFrame.from_dict(returns_dict)
df_returns = df_returns.apply(lambda x: x.fillna(x.mean()), axis=0)

df_returns

In [12]:
# Calculate the Whole Story Betas

# Create useful series
risk_free_daily = (((1 + (rfr['risk_free_rate'].div(100)))**(1/360)) - 1)
market_returns = sp500['sp_500'].pct_change(1).dropna()

# Common Indexes
common_index = df_returns.index.intersection(risk_free_daily.index).intersection(market_returns.index)
df_returns, risk_free_daily, market_returns = df_returns.loc[common_index], risk_free_daily.loc[common_index], market_returns.loc[common_index]

In [13]:
# Create Weights
window = len(df_returns)
weights = window * wexp(window, window/2)

plt.figure(figsize=(10, 6))
plt.plot(weights, label='Weights', color='black', alpha=0.7)

# Config
plt.title('Weights (no flip) Graph')
plt.xlabel('Index')
plt.ylabel('Weights')
plt.legend()

# Show
plt.show()

In [14]:
# Create the Historical Betas and Alpha + Residuals DataFrames
betas_dict = {}
alpha_resids_df = pd.DataFrame(index=df_returns.index)

# Loop to Obtain Betas and Alpha + Residuals
for ticker in df_returns.columns:
    df = pd.DataFrame()
    df['y'] = df_returns[ticker] - risk_free_daily
    df['x'] = market_returns - risk_free_daily

    # Drop NaNs
    df = df.dropna()

    # Add constant (alpha term)
    X = sm.add_constant(df['x'])
    y = df['y']

    # Fit the WLS model
    model = sm.WLS(y, X, weights=weights, missing='drop').fit()

    # Extract Alpha and Beta
    alpha = model.params['const']
    beta = model.params['x']

    # Store Beta
    betas_dict[ticker] = beta

    # Compute (Alpha + Residuals)
    alpha_residuals = model.resid + alpha  # Add alpha to residuals

    # Store Alpha + Residuals
    alpha_resids_df = pd.concat([alpha_resids_df, alpha_residuals.rename(ticker)], axis=1)

# Create Beta Series
betas_series = pd.Series(betas_dict)

betas_series

In [15]:
alpha_resids_df

In [16]:
# Save the betas
alpha_resids_df.to_csv(r"..\additional_data\capm_residuals.csv")
betas_series.to_csv(r"..\additional_data\historical_betas.csv")

In [17]:
# Plot
ticker = 'NVDA'

# Mean
mean = betas_df[ticker].mean()

# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(betas_df[ticker], label=f'{ticker} Beta', color='blue', alpha=0.7)
plt.axhline(y=betas_series.loc[ticker], color='red', linestyle='dashed', label=f'{ticker} Historical Beta')
plt.axhline(y=mean, color='black', linestyle='dashed', label=f'{ticker} Mean Beta')

# Config
plt.title('Beta Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()

# Show
plt.show()

In [18]:
# Plot
ticker = 'AAPL'

# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(df_returns[ticker].cumsum(), label=f'{ticker} Returns', alpha=0.7)
plt.plot(alpha_resids_df[ticker].cumsum(), label=f'{ticker} Residual Returns', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [19]:
# Compare Volatility and Betas for each stock

anualized_vol = df_returns[ticker].std() * np.sqrt(252)
comparison = betas_series.loc[ticker]/anualized_vol

print(f'{ticker} Historical Beta is: {betas_series.loc[ticker]}')
print(f'{ticker} Historical Annualized Standard Deviation is: {anualized_vol}')
print(f'{ticker} Ratio Between Both is: {comparison}')

In [20]:
# Calculate Mean and Standard Deviation
mu = betas_series.mean()
sigma = betas_series.std()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(betas_series, bins=30, density=True, color='lightskyblue', alpha=0.5, edgecolor='black', label='Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(betas_series.min(), betas_series.max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=betas_series.median(), color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()