# Fama and French Factor Model #

### The Model Specification ###

In [1]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy.stats import norm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from data_downloader import get_market_data
from capm_toolkit import wexp
from capm_toolkit import capm_regression

In [2]:
# We can download the premiums in the Fama and French website
premiums_df = pd.read_csv(r'..\additional_data\famafrench_premiums.csv')
premiums_df.set_index('Date', inplace=True)
premiums_df.index = pd.to_datetime(premiums_df.index)
premiums_df.columns = ['mkt_premium', 'smb_premium', 'hml_premium', 'risk_free_rate']
premiums_df = premiums_df.div(100)

premiums_df

In [3]:
# Create Plot
plt.figure(figsize=(10, 6))
plt.plot(
    premiums_df[['mkt_premium', 'smb_premium', 'hml_premium']].cumsum(), 
    label=premiums_df[['mkt_premium', 'smb_premium', 'hml_premium']].columns, 
    alpha=1
)

# Config
plt.title('Cumulative Premiums Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [4]:
# Get Data from Stock

ticker = 'MSFT'

data_stock = get_market_data(
    ticker=ticker, 
    start_date='2015-01-01', 
    end_date='2025-01-01', 
    returns=True
)

data_stock

In [5]:
# Create the regression dataframe
regression_df = pd.DataFrame(index = data_stock.index)

regression_df['stock_premium'] = data_stock['returns'] - premiums_df['risk_free_rate']
regression_df['mkt_premium'] = premiums_df['mkt_premium']
regression_df['smb_premium'] = premiums_df['smb_premium']
regression_df['hml_premium'] = premiums_df['hml_premium']
regression_df.dropna(inplace = True)

regression_df

In [6]:
# Create the Y Vector
y = regression_df['stock_premium']

# Create the X Matrix
x = regression_df[['mkt_premium', 'smb_premium', 'hml_premium']]

In [7]:
x.corr()

In [8]:
# Calculate Weights
window = len(y)

# The results of the model
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())  

In [9]:
# Let us Create a new function
def FamaFrenchFactors(
    stock_returns: pd.Series, 
    market_returns: pd.Series, 
    small_minus_big_series: pd.Series,
    high_minus_low_series: pd.Series,
):

    # Align time series to the same date range
    common_index = stock_returns.index.intersection(market_returns.index) \
    .intersection(small_minus_big_series.index) \
    .intersection(high_minus_low_series.index)
    
    stock_returns = stock_returns.loc[common_index]
    market_returns = market_returns.loc[common_index]
    small_minus_big_series = small_minus_big_series.loc[common_index]
    high_minus_low_series = high_minus_low_series.loc[common_index]
    
    X = pd.concat([market_returns, small_minus_big_series, high_minus_low_series], axis=1)
    y = stock_returns

    # Create weights with exponential decay
    T = len(y)
    weights = T * wexp(T, T/2)
    
    # Fit WLS regression
    model = sm.WLS(y, sm.add_constant(X), weights=weights, missing='drop').fit()

    # Avoid KeyError by checking if params exist
    params = model.params
    
    alpha = params.iloc[0]
    capm_beta = params.iloc[1]
    smb_beta = params.iloc[2]
    hml_beta = params.iloc[3]
            
    parameters = {
        'alpha':alpha,
        'mkt_beta':capm_beta,
        'smb_beta':smb_beta,
        'hml_beta':hml_beta,
    }
    
    return parameters

In [10]:
# Check if the Function Works

parameters = FamaFrenchFactors(
    regression_df['stock_premium'],
    regression_df['mkt_premium'],
    regression_df['smb_premium'],
    regression_df['hml_premium'],
)

parameters

### Getting the Coefficients for all the Stocks ###

In [11]:
# Dictionary to store the DataFrames
returns_df = pd.read_csv(r'..\additional_data\stocks_returns.csv')
returns_df.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
returns_df.set_index('Date', inplace=True)
returns_df.index = pd.to_datetime(returns_df.index)

returns_df

In [12]:
# Calculate the Coefficients
tickers = []
betas_capm = []
betas_smb = []
betas_hml = []

# Create the Loop
for ticker in returns_df.columns:
    stock_excess_returns = returns_df[ticker] - premiums_df['risk_free_rate']

    # Calculations
    parameters = FamaFrenchFactors(
        stock_excess_returns,
        premiums_df['mkt_premium'],
        premiums_df['smb_premium'],
        premiums_df['hml_premium'],
    )

    tickers.append(ticker)
    betas_capm.append(parameters['mkt_beta'])
    betas_smb.append(parameters['smb_beta'])
    betas_hml.append(parameters['hml_beta'])

    print(f'{ticker} is ready.')

In [13]:
# Create the DataFrame

ff_betas_df = pd.DataFrame(
    {
        'mkt_beta': betas_capm,
        'smb_beta': betas_smb,
        'hml_beta': betas_hml
    }, index = tickers,
)

ff_betas_df

In [14]:
# Calculate Mean and Standard Deviation
mu = ff_betas_df['mkt_beta'].mean()
sigma = ff_betas_df['mkt_beta'].std()
median = ff_betas_df['mkt_beta'].median()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(ff_betas_df['mkt_beta'], bins=30, density=True, color='lightskyblue', alpha=0.5, edgecolor='black', label='Market Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(ff_betas_df['mkt_beta'].min(), ff_betas_df['mkt_beta'].max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=median, color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('Market Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

In [15]:
# Calculate Mean and Standard Deviation
mu = ff_betas_df['smb_beta'].mean()
sigma = ff_betas_df['smb_beta'].std()
median = ff_betas_df['smb_beta'].median()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(ff_betas_df['smb_beta'], bins=30, density=True, color='lightgreen', alpha=0.5, edgecolor='black', label='SMB Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(ff_betas_df['smb_beta'].min(), ff_betas_df['smb_beta'].max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=median, color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('SMB Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

In [16]:
# Calculate Mean and Standard Deviation
mu = ff_betas_df['hml_beta'].mean()
sigma = ff_betas_df['hml_beta'].std()
median = ff_betas_df['hml_beta'].median()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(ff_betas_df['hml_beta'], bins=30, density=True, color='salmon', alpha=0.5, edgecolor='black', label='HML Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(ff_betas_df['hml_beta'].min(), ff_betas_df['hml_beta'].max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=median, color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('HML Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

In [17]:
# Calculate the VIF
# Concat
X = pd.concat([
    regression_df['mkt_premium'],
    regression_df['smb_premium'],
    regression_df['hml_premium'],
], axis=1, join='inner')


X.dropna(inplace = True)

X.columns = ['mkt', 'smb', 'hml']

X

In [18]:
vif_data = pd.DataFrame()
vif_data['vars'] = X.columns
vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

vif_data

In [19]:
r_squared_df = pd.DataFrame()
r_squared_df['vars'] = X.columns

r_squared_df['r_squared'] = 1 - (1 / vif_data['VIF'])

r_squared_df

In [68]:
ff_betas_df.to_csv(r"..\additional_data\famafrench_betas.csv")