# Fama-MacBeth Regression #

### Size, Value and Beta ###

In [10]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
from scipy import stats
import statsmodels.api as sm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import import_stock_universe
from capm_toolkit import wexp
from capm_toolkit import compute_daily_returns
from capm_toolkit import compute_excess_returns
from capm_toolkit import capm_regression

In [11]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = import_stock_universe(
    folder_path,
    ['Adjusted_close', 'Company Market Cap'],
    ['adj_close', 'mkt_cap'],
)

In [12]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)

# Get the important data for the S&P500

sp500 = pd.read_csv(r"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

In [13]:
# Get the important data for the Betas

ff_betas = pd.read_csv(r"..\additional_data\famafrench_betas.csv")
ff_betas = ff_betas.rename(columns={'Unnamed: 0': 'ticker'})
ff_betas.set_index('ticker', inplace = True)

ff_betas

In [15]:
# Create a DataFrame
mkt_cap_dict = {}

# Create the loop
for ticker, df in dataframes.items():
    
    mkt_cap_dict[ticker] = df['mkt_cap'].bfill()

mkt_cap_df = pd.DataFrame.from_dict(mkt_cap_dict)
mkt_cap_df = mkt_cap_df.bfill()

mkt_cap_df

In [16]:
# Let us obtain the betas of each stock

returns_dic = {}

for ticker, df in dataframes.items():
    
    returns_dic[ticker] = df['adj_close'].pct_change(1)

returns_df = pd.DataFrame.from_dict(returns_dic)
returns_df = returns_df.apply(lambda x: x.fillna(x.mean()), axis=0)

returns_df

In [20]:
# Intersect Dates
common_dates = returns_df.index.intersection(mkt_cap_df.index)

# Filter for common dates
mkt_cap_df = mkt_cap_df.loc[common_dates]
returns_df = returns_df.loc[common_dates]

# And Filter columns
mkt_cap_df = mkt_cap_df[ff_betas.index]
returns_df = returns_df[ff_betas.index]

In [21]:
print(mkt_cap_df.shape)
print(returns_df.shape)

In [22]:
# Initialize lists to store results
betas_list = []

# Loop over each available date in the returns DataFrame
for date in common_dates:
    # Cross-section of market caps, returns, and betas for that specific date
    weights = np.sqrt(mkt_cap_df.loc[date])
    weights = weights / weights.sum()

    rets = pd.DataFrame([returns_df.loc[date]], index=['returns']).transpose()
    
    # Merge returns with characteristics
    reg_df_data = pd.concat([rets, ff_betas], axis=1).dropna()

    # Define independent (X) and dependent (y) variables
    X = sm.add_constant(reg_df_data[['mkt_beta', 'smb_beta', 'hml_beta']])  
    y = reg_df_data['returns']  

    # Run the weighted least squares (WLS) regression
    model = sm.WLS(y, X, missing='drop', weights=weights)
    results = model.fit()

    # Extract coefficients, t-stats, and p-values
    params = results.params

    # Store results in separate lists
    betas_list.append(pd.Series(params, name=date)) 

# Convert lists to DataFrames
history_betas_df = pd.DataFrame(betas_list)

# Set the index as the dates
history_betas_df.index = common_dates


In [23]:
# Check the DataFrames

history_betas_df

In [24]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['mkt_beta'].cumsum(), label='Market Beta Returns', alpha=0.7)
plt.plot(history_betas_df['smb_beta'].cumsum(), label='SMB Beta Returns', alpha=0.7)
plt.plot(history_betas_df['hml_beta'].cumsum(), label='HML Beta Returns', alpha=0.7)

# Config
plt.title('Factor Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [26]:
# Get the data for the SMB Premium
SMB = pd.read_csv(r"..\additional_data\famafrench_smb.csv")
SMB = SMB.set_index('Date')
SMB.index = pd.to_datetime(SMB.index)

# Get the data for the HML Premium
HML = pd.read_csv(r"..\additional_data\famafrench_hml.csv")
HML = HML.set_index('Date')
HML.index = pd.to_datetime(HML.index)

In [28]:
# Create the Plot
daily_rfr = compute_daily_returns(rfr['risk_free_rate'])
market_excess_returns = compute_excess_returns(sp500['sp_500'], rfr['risk_free_rate'])

plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['mkt_beta'].cumsum(), label='Regression Market Beta Returns', alpha=0.7)
plt.plot(market_excess_returns.cumsum(), label='Calculated Market Beta Returns', alpha=0.7)

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [29]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['smb_beta'].cumsum(), label='Regression SMB Beta Returns', alpha=0.7)
plt.plot(SMB.cumsum(), label='Calculated SMB Beta Returns', alpha=0.7)

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [30]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['hml_beta'].cumsum(), label='Regression HML Beta Returns', alpha=0.7)
plt.plot(HML.cumsum(), label='Calculated HML Beta Returns', alpha=0.7)

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [31]:
# Lets test the significance of these coefficients
def newey_west_std(errors, lag=4):
    """
    Computes Newey-West standard errors for a time series.
    
    Parameters:
    errors: Pandas Series or NumPy array of residuals (gamma estimates)
    lag: Maximum number of lags to consider (default: 4)
    
    Returns:
    Newey-West adjusted standard error
    """
    T = len(errors)
    gamma_var = errors.var()  # Start with variance of the series
    
    for l in range(1, lag + 1):
        weight = 1 - (l / (lag + 1))
        autocov = np.cov(errors[:-l], errors[l:])[0, 1]  # Autocovariance at lag l
        gamma_var += 2 * weight * autocov  # Newey-West adjustment

    return np.sqrt(gamma_var / T)  # Standard error

def fama_macbeth_significance_test(gamma_series, lag=4):
    """
    Performs statistical significance tests for Fama-MacBeth risk premia.

    Parameters:
    gamma_series: DataFrame where each column contains estimated gammas over time.
    lag: Lags for Newey-West standard errors (default: 4).

    Returns:
    DataFrame with mean gamma, standard error, t-statistics, and p-values.
    """
    gamma_means = gamma_series.mean()

    # Compute Newey-West adjusted standard errors
    gamma_std = gamma_series.apply(newey_west_std, lag=lag)

    # Compute t-statistics
    t_stats = gamma_means / gamma_std

    # Compute p-values
    p_values = 2 * (1 - stats.t.cdf(abs(t_stats), df=len(gamma_series) - 1))

    # Create results DataFrame
    results = pd.DataFrame({
        'Mean Gamma': gamma_means,
        'Std Error': gamma_std,
        't-stat': t_stats,
        'p-value': p_values
    })

    return results


In [33]:
# Now the Results

results = fama_macbeth_significance_test(history_betas_df[['mkt_beta',	'smb_beta',	'hml_beta']])

results