# Fama-MacBeth Regression #

### Market, Size, Value, Momentum and Beta ###

In [19]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import import_stock_universe
from factors_toolkit import fama_macbeth_significance_test

In [6]:
# Get the important data for the Risk Free Rate
rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)

# Get the important data for the S&P500
sp500 = pd.read_csv(r"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

In [7]:
# Get the important data for the Betas

# Market Betas
carhart_mkt_betas = pd.read_csv(r"..\additional_data\carhart_market_betas.csv")
carhart_mkt_betas.set_index('Date', inplace = True)
carhart_mkt_betas.index = pd.to_datetime(carhart_mkt_betas.index)

# SMB Betas
carhart_smb_betas = pd.read_csv(r"..\additional_data\carhart_size_betas.csv")
carhart_smb_betas.set_index('Date', inplace = True)
carhart_smb_betas.index = pd.to_datetime(carhart_smb_betas.index)

# HML Betas
carhart_hml_betas = pd.read_csv(r"..\additional_data\carhart_value_betas.csv")
carhart_hml_betas.set_index('Date', inplace = True)
carhart_hml_betas.index = pd.to_datetime(carhart_hml_betas.index)

# WML Betas
carhart_wml_betas = pd.read_csv(r"..\additional_data\carhart_mom_betas.csv")
carhart_wml_betas.set_index('Date', inplace = True)
carhart_wml_betas.index = pd.to_datetime(carhart_wml_betas.index)

# AMD Betas
carhart_bab_betas = pd.read_csv(r"..\additional_data\carhart_beta_betas.csv")
carhart_bab_betas.set_index('Date', inplace = True)
carhart_bab_betas.index = pd.to_datetime(carhart_bab_betas.index)

In [8]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = import_stock_universe(
    folder_path,
    ['Adjusted_close', 'Company Market Cap'],
    ['adj_close', 'mkt_cap'],
)

In [9]:
# Create a whole new dataframe that contains all the stocks betas
mktcap_series = []

for stock, df in dataframes.items():
    series = df['mkt_cap'].rename(stock)  
    mktcap_series.append(series)

# Concat
mktcap_df = pd.concat(mktcap_series, axis=1)

# Apply Logs and EMA (maybe)
mktcap_df = np.log(mktcap_df)
mktcap_df = mktcap_df.ewm(span=5, adjust = False).mean()
mktcap_df = mktcap_df.bfill()
mktcap_df

In [10]:
# Create a whole new dataframe that contains all the stocks betas
rets_series = []

for stock, df in dataframes.items():
    series = df['adj_close'].pct_change(1).rename(stock)  
    series = series.iloc[1:]
    rets_series.append(series)

# Concat
returns_df = pd.concat(rets_series, axis=1)
returns_df = returns_df.apply(lambda x: x.fillna(x.mean()), axis=0)

# Drop nans
returns_df.dropna(inplace = True)

returns_df

In [16]:
# Intersect Dates
common_dates = returns_df.index.intersection(carhart_mkt_betas.index)

# Filter for common dates
mkt_cap_df = mktcap_df.loc[common_dates]
returns_df = returns_df.loc[common_dates]
carhart_mkt_betas = carhart_mkt_betas.loc[common_dates]
carhart_smb_betas = carhart_smb_betas.loc[common_dates]
carhart_hml_betas = carhart_hml_betas.loc[common_dates]
carhart_wml_betas = carhart_wml_betas.loc[common_dates]
carhart_bab_betas = carhart_bab_betas.loc[common_dates]

# Filter common stocks
mkt_cap_df = mkt_cap_df[carhart_mkt_betas.columns]
returns_df = returns_df[carhart_mkt_betas.columns]

In [17]:
print(mkt_cap_df.shape)
print(returns_df.shape)
print(carhart_mkt_betas.shape)
print(carhart_smb_betas.shape)
print(carhart_hml_betas.shape)
print(carhart_wml_betas.shape)
print(carhart_bab_betas.shape)

In [18]:
# Initialize lists to store results
betas_list = []

# Loop over each available date in the returns DataFrame
for date in common_dates:
    # Cross-section of market caps, returns, and betas for that specific date
    weights = np.sqrt(mkt_cap_df.loc[date])
    weights = weights / weights.sum()

    # Returns
    rets = pd.DataFrame([returns_df.loc[date]], index=['returns']).transpose()

    # The Model Betas
    mkt = pd.DataFrame([carhart_mkt_betas.loc[date]], index=['mkt']).transpose()
    smb = pd.DataFrame([carhart_smb_betas.loc[date]], index=['smb']).transpose()
    hml = pd.DataFrame([carhart_hml_betas.loc[date]], index=['hml']).transpose()
    wml = pd.DataFrame([carhart_wml_betas.loc[date]], index=['wml']).transpose()
    amd = pd.DataFrame([carhart_bab_betas.loc[date]], index=['bab']).transpose()
    
    # Merge returns with characteristics
    reg_df_data = pd.concat([rets, mkt, smb, hml, wml, amd], axis=1).dropna()

    # Define independent (X) and dependent (y) variables
    X = sm.add_constant(reg_df_data[['mkt', 'smb', 'hml', 'wml', 'bab']])  
    y = reg_df_data['returns']  

    # Run the weighted least squares (WLS) regression
    model = sm.WLS(y, X, missing='drop', weights=weights)
    results = model.fit()

    # Extract coefficients and p-values
    params = results.params
    
    # Store results
    betas_list.append(pd.Series(params, name=date))

# Convert lists to DataFrames
history_betas_df = pd.DataFrame(betas_list)

# Set the index as the dates
history_betas_df.index = common_dates

In [20]:
# Check the DataFrames

history_betas_df

In [21]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['mkt'].cumsum(), label='Market Beta Returns', alpha=0.7)
plt.plot(history_betas_df['smb'].cumsum(), label='SMB Beta Returns', alpha=0.7)
plt.plot(history_betas_df['hml'].cumsum(), label='HML Beta Returns', alpha=0.7)
plt.plot(history_betas_df['wml'].cumsum(), label='WML Beta Returns', alpha=0.7)
plt.plot(history_betas_df['bab'].cumsum(), label='BAB Beta Returns', alpha=0.7)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Factor Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [22]:
# Now the Results

results = fama_macbeth_significance_test(history_betas_df[['mkt', 'smb', 'hml', 'wml', 'bab']])

results