# Fama and French Factor Model #

### The Model Specification ###

In [1]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy.stats import norm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import import_financial_data
from functions import import_stock_universe
from capm_toolkit import wexp
from capm_toolkit import compute_daily_returns
from capm_toolkit import compute_excess_returns
from capm_toolkit import capm_regression

In [2]:
# Get Data from Stock

ticker = 'MSFT'

stock_price = import_financial_data(ticker)
stock_returns = stock_price['adj_close'].pct_change(1).dropna()

stock_returns

In [3]:
# Get the important data for the Risk Free Rate
rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)

# Get the important data for the S&P500
sp500 = pd.read_csv(r"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

# Get the data for the SMB Premium
SMB = pd.read_csv(r"..\additional_data\famafrench_smb.csv")
SMB = SMB.set_index('Date')
SMB.index = pd.to_datetime(SMB.index)

# Get the data for the HML Premium
HML = pd.read_csv(r"..\additional_data\famafrench_hml.csv")
HML = HML.set_index('Date')
HML.index = pd.to_datetime(HML.index)

In [4]:
# Create the data
daily_rfr = compute_daily_returns(rfr['risk_free_rate'])
market_excess_returns = compute_excess_returns(sp500['sp_500'], rfr['risk_free_rate'])
stock_excess_returns = stock_returns - daily_rfr

In [6]:
# Create the regression dataframe
regression_df = pd.DataFrame(index = stock_excess_returns.index)

regression_df['stock'] = stock_excess_returns
regression_df['MKT'] = market_excess_returns
regression_df['SMB'] = SMB
regression_df['HML'] = HML
regression_df.dropna(inplace = True)

regression_df

In [7]:
# Create the Y Vector
y = regression_df['stock']

# Create the X Matrix
x = regression_df[['MKT', 'SMB', 'HML']]

In [8]:
x.corr()

In [9]:
# Calculate Weights
window = len(y)

# The results of the model
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())  

In [10]:
# Let us Create a new function
def FamaFrenchFactors(
    stock_returns: pd.Series, 
    market_returns: pd.Series, 
    small_minus_big_series: pd.Series,
    high_minus_low_series: pd.Series,
):

    # Align time series to the same date range
    common_index = stock_returns.index.intersection(market_returns.index) \
    .intersection(small_minus_big_series.index) \
    .intersection(high_minus_low_series.index)
    
    stock_returns = stock_returns.loc[common_index]
    market_returns = market_returns.loc[common_index]
    small_minus_big_series = small_minus_big_series.loc[common_index]
    high_minus_low_series = high_minus_low_series.loc[common_index]
    
    X = pd.concat([market_returns, small_minus_big_series, high_minus_low_series], axis=1)
    y = stock_returns

    # Create weights with exponential decay
    T = len(y)
    weights = T * wexp(T, T/2)
    
    # Fit WLS regression
    model = sm.WLS(y, sm.add_constant(X), weights=weights, missing='drop').fit()

    # Avoid KeyError by checking if params exist
    params = model.params
    
    alpha = params.iloc[0]
    capm_beta = params.iloc[1]
    smb_beta = params.iloc[2]
    hml_beta = params.iloc[3]
            
    parameters = {
        'alpha':alpha,
        'capm_beta':capm_beta,
        'smb_beta':smb_beta,
        'hml_beta':hml_beta,
    }
    
    return parameters

In [11]:
# Check if the Function Works

parameters = FamaFrenchFactors(
    stock_excess_returns,
    market_excess_returns,
    SMB,
    HML,
)

parameters

### Obtaining the Coefficients for all the Stocks ###

In [12]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = import_stock_universe(
    folder_path,
    ['Adjusted_close', 'Company Market Cap', 'Price_to_Book_inverse'],
    ['adj_close', 'mkt_cap', 'btp'],
)

In [14]:
# Calculate the Coefficients
tickers = []
betas_capm = []
betas_smb = []
betas_hml = []

# Create the Loop
for ticker in dataframes.keys():
    stock_returns = dataframes[ticker]['adj_close'].pct_change(1).dropna()
    stock_excess_returns = stock_returns - daily_rfr

    # Calculations
    parameters = FamaFrenchFactors(
        stock_excess_returns,
        market_excess_returns,
        SMB,
        HML,
    )

    tickers.append(ticker)
    betas_capm.append(parameters['mkt_beta'])
    betas_smb.append(parameters['smb_beta'])
    betas_hml.append(parameters['hml_beta'])

    print(f'{ticker} is ready.')

In [16]:
# Create the DataFrame

ff_betas_df = pd.DataFrame(
    {
        'mkt_beta': betas_capm,
        'smb_beta': betas_smb,
        'hml_beta': betas_hml
    }, index = tickers,
)

ff_betas_df

In [17]:
# Calculate Mean and Standard Deviation
mu = ff_betas_df['mkt_beta'].mean()
sigma = ff_betas_df['mkt_beta'].std()
median = ff_betas_df['mkt_beta'].median()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(ff_betas_df['mkt_beta'], bins=30, density=True, color='lightskyblue', alpha=0.5, edgecolor='black', label='CAPM Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(ff_betas_df['mkt_beta'].min(), ff_betas_df['mkt_beta'].max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=median, color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('CAPM Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

In [18]:
# Calculate Mean and Standard Deviation
mu = ff_betas_df['smb_beta'].mean()
sigma = ff_betas_df['smb_beta'].std()
median = ff_betas_df['smb_beta'].median()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(ff_betas_df['smb_beta'], bins=30, density=True, color='lightgreen', alpha=0.5, edgecolor='black', label='SMB Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(ff_betas_df['smb_beta'].min(), ff_betas_df['smb_beta'].max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=median, color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('SMB Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

In [19]:
# Calculate Mean and Standard Deviation
mu = ff_betas_df['hml_beta'].mean()
sigma = ff_betas_df['hml_beta'].std()
median = ff_betas_df['hml_beta'].median()

# Create Histogram
plt.figure(figsize=(10, 6))
plt.hist(ff_betas_df['hml_beta'], bins=30, density=True, color='salmon', alpha=0.5, edgecolor='black', label='HML Betas Distribution')

# Generate the Values of the Normal Distribution
x = np.linspace(ff_betas_df['hml_beta'].min(), ff_betas_df['hml_beta'].max(), 100)
y = norm.pdf(x, mu, sigma)

# Graph the Real Normal Distribution
plt.plot(x, y, color='black', linestyle='solid', linewidth=2, label='Normal Distribution')

# Reference Lines
plt.axvline(x=mu, color='black', linestyle='dashed', label='Mean Returns')
plt.axvline(x=median, color='red', linestyle='dashed', label='Median Returns')
plt.axvline(x=mu + sigma, color='grey', linestyle='dashed')
plt.axvline(x=mu - sigma, color='grey', linestyle='dashed')

# Config
plt.title('HML Betas Histogram with Normal Distribution')
plt.xlabel('Return')
plt.ylabel('Density')

# Legends and Grid
plt.legend()
plt.grid(True)

# Show
plt.show()

In [21]:
# Calculate the VIF
# Concat
X = pd.concat([
    market_excess_returns,
    SMB,
    HML,
], axis=1, join='inner')


X.dropna(inplace = True)

X.columns = ['mkt', 'smb', 'hml']

X

In [23]:
vif_data = pd.DataFrame()
vif_data['vars'] = X.columns
vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

vif_data

In [24]:
r_squared_df = pd.DataFrame()
r_squared_df['vars'] = X.columns

r_squared_df['r_squared'] = 1 - (1 / vif_data['VIF'])

r_squared_df

In [25]:
ff_betas_df.to_csv(r"..\additional_data\famafrench_betas.csv")