# Fama-MacBeth Regression #

### Size, Value and Beta ###

In [69]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
from scipy import stats
import statsmodels.api as sm

In [70]:
# Call the returns
returns_df = pd.read_csv(r'..\additional_data\stocks_returns.csv')
returns_df.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
returns_df.set_index('Date', inplace=True)
returns_df.index = pd.to_datetime(returns_df.index)

returns_df

In [71]:
# We can download the premiums in the Fama and French website
premiums_df = pd.read_csv(r'..\additional_data\famafrench_premiums.csv')
premiums_df.set_index('Date', inplace=True)
premiums_df.index = pd.to_datetime(premiums_df.index)
premiums_df.columns = ['mkt_premium', 'smb_premium', 'hml_premium', 'risk_free_rate']
premiums_df = premiums_df.div(100)

premiums_df

In [72]:
# Get the important data for the Betas
ff_betas = pd.read_csv(r"..\additional_data\famafrench_betas.csv")
ff_betas = ff_betas.rename(columns={'Unnamed: 0': 'ticker'})
ff_betas.set_index('ticker', inplace = True)

ff_betas

In [73]:
# Intersect Dates
common_dates = returns_df.index.intersection(premiums_df.index)

# Filter for common dates
premiums_df = premiums_df.loc[common_dates]
returns_df = returns_df.loc[common_dates]

In [76]:
print(premiums_df.shape)
print(returns_df.shape)

In [77]:
# Initialize lists to store results
betas_list = []

# Loop over each available date in the returns DataFrame
for date in common_dates:

    rets = pd.DataFrame([returns_df.loc[date]], index=['returns']).transpose()
    
    # Merge returns with characteristics
    reg_df_data = pd.concat([rets, ff_betas], axis=1).dropna()

    # Define independent (X) and dependent (y) variables
    X = sm.add_constant(reg_df_data[['mkt_beta', 'smb_beta', 'hml_beta']])  
    y = reg_df_data['returns']  

    # Run the weighted least squares (WLS) regression
    model = sm.OLS(y, X, missing='drop')
    results = model.fit()

    # Extract coefficients, t-stats, and p-values
    params = results.params

    # Store results in separate lists
    betas_list.append(pd.Series(params, name=date)) 

# Convert lists to DataFrames
history_betas_df = pd.DataFrame(betas_list)

# Set the index as the dates
history_betas_df.index = common_dates

In [78]:
# Check the DataFrames
history_betas_df

In [79]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['mkt_beta'].cumsum(), label='Market Beta Returns', alpha=0.7)
plt.plot(history_betas_df['smb_beta'].cumsum(), label='SMB Beta Returns', alpha=0.7)
plt.plot(history_betas_df['hml_beta'].cumsum(), label='HML Beta Returns', alpha=0.7)

# Config
plt.title('Factor Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [80]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['mkt_beta'].cumsum(), label='Regression Market Beta Returns', alpha=0.7)
plt.plot(premiums_df['mkt_premium'].cumsum(), label='Calculated Market Beta Returns', alpha=0.7)

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [81]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['smb_beta'].cumsum(), label='Regression SMB Beta Returns', alpha=0.7)
plt.plot(premiums_df['smb_premium'].cumsum(), label='Calculated SMB Beta Returns', alpha=0.7)

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [82]:
# Create the Plot
plt.figure(figsize=(10, 6))
plt.plot(history_betas_df['hml_beta'].cumsum(), label='Regression HML Beta Returns', alpha=0.7)
plt.plot(premiums_df['hml_premium'].cumsum(), label='Calculated HML Beta Returns', alpha=0.7)

# Config
plt.title('Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()

# Show
plt.show()

In [83]:
# Lets test the significance of these coefficients
def newey_west_std(errors, lag=4):
    """
    Computes Newey-West standard errors for a time series.
    
    Parameters:
    errors: Pandas Series or NumPy array of residuals (gamma estimates)
    lag: Maximum number of lags to consider (default: 4)
    
    Returns:
    Newey-West adjusted standard error
    """
    T = len(errors)
    gamma_var = errors.var()  # Start with variance of the series
    
    for l in range(1, lag + 1):
        weight = 1 - (l / (lag + 1))
        autocov = np.cov(errors[:-l], errors[l:])[0, 1]  # Autocovariance at lag l
        gamma_var += 2 * weight * autocov  # Newey-West adjustment

    return np.sqrt(gamma_var / T)  # Standard error

def fama_macbeth_significance_test(gamma_series, lag=4):
    """
    Performs statistical significance tests for Fama-MacBeth risk premia.

    Parameters:
    gamma_series: DataFrame where each column contains estimated gammas over time.
    lag: Lags for Newey-West standard errors (default: 4).

    Returns:
    DataFrame with mean gamma, standard error, t-statistics, and p-values.
    """
    gamma_means = gamma_series.mean()

    # Compute Newey-West adjusted standard errors
    gamma_std = gamma_series.apply(newey_west_std, lag=lag)

    # Compute t-statistics
    t_stats = gamma_means / gamma_std

    # Compute p-values
    p_values = 2 * (1 - stats.t.cdf(abs(t_stats), df=len(gamma_series) - 1))

    # Create results DataFrame
    results = pd.DataFrame({
        'Mean Gamma': gamma_means,
        'Std Error': gamma_std,
        't-stat': t_stats,
        'p-value': p_values
    })

    return results


In [84]:
# Now the Results

results = fama_macbeth_significance_test(history_betas_df[['mkt_beta',	'smb_beta',	'hml_beta']])

results