In [14]:
# 1.1 Import Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from datetime import datetime
import yfinance as yf
import warnings
import pandas_datareader.data as web


# Suppress the specific FutureWarning
warnings.filterwarnings('ignore', category=FutureWarning, message='The argument \'date_parser\'')

# If you plan to fetch certain factor data from web sources:
# from pandas_datareader.data import DataReader

# For display in a notebook
%matplotlib inline

In [15]:
# 2.1 Define Tickers
tickers = ["AAPL", "MSFT", "AMZN", "TSLA"]  # example set
start_date = "2017-01-01"
end_date = "2022-12-31"

# 2.2 Fetch Price Data
df_prices = pd.DataFrame()
for ticker in tickers:
    data = yf.download(ticker, start = start_date,end = end_date, auto_adjust=True)
    df_prices[ticker] = data['Close']

# 2.3 Inspect
df_prices.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,AAPL,MSFT,AMZN,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-03,26.891966,56.714634,37.683498,14.466
2017-01-04,26.861856,56.460876,37.859001,15.132667
2017-01-05,26.998461,56.460876,39.022499,15.116667
2017-01-06,27.29945,56.950256,39.7995,15.267333
2017-01-09,27.5495,56.769001,39.846001,15.418667


In [16]:
# 3.1 Resample Daily Prices to Monthly
df_prices_monthly = df_prices.resample("ME").last()

# 3.2 Compute Simple Returns
df_returns = df_prices_monthly.pct_change().dropna()

df_returns.head()

Unnamed: 0_level_0,AAPL,MSFT,AMZN,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-02-28,0.133778,-0.004364,0.026182,-0.007701
2017-03-31,0.04869,0.029384,0.04911,0.113245
2017-04-30,-7e-05,0.039478,0.043371,0.12853
2017-05-31,0.067807,0.026005,0.075276,0.085777
2017-06-30,-0.057214,-0.01303,-0.026764,0.060409


In [17]:
# 4.1 Example Using pandas_datareader (if installed & set up properly):

ff_data = web.DataReader("F-F_Research_Data_Factors", "famafrench", start_date, end_date)
# This returns a dictionary of DataFrames. The first key [0] is typically the monthly data.
df_factors_raw = ff_data[0].copy()  # monthly returns, in %

df_factors_raw.head()


Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01,1.94,-1.18,-2.75,0.04
2017-02,3.57,-2.05,-1.67,0.04
2017-03,0.17,1.14,-3.35,0.03
2017-04,1.09,0.73,-2.13,0.05
2017-05,1.06,-2.57,-3.78,0.06


In [18]:
# 4.2 Preprocess Factor Data
df_factors = df_factors_raw[['Mkt-RF','SMB','HML','RF']].copy()

# Convert from percent to decimal
df_factors = df_factors / 100.0

# The index is typically in Period format; convert to datetime (monthly end)
df_factors.index = df_factors.index.to_timestamp("M")

# Filter to match our date range (if needed)
df_factors = df_factors.loc[df_returns.index.min():df_returns.index.max()]

df_factors.head()


Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-02-28,0.0357,-0.0205,-0.0167,0.0004
2017-03-31,0.0017,0.0114,-0.0335,0.0003
2017-04-30,0.0109,0.0073,-0.0213,0.0005
2017-05-31,0.0106,-0.0257,-0.0378,0.0006
2017-06-30,0.0078,0.0225,0.0148,0.0006


In [None]:
# 5.2.1 Prepare data for a single ticker
ticker = "AAPL"

# Get the stock's monthly returns
stock_returns = df_returns[ticker]

# Prepare the factor columns we want: (Mkt-RF), SMB, HML
X_factors = df_factors[["Mkt-RF","SMB","HML"]].copy()

# Dependent variable: stock excess return (R_i - RF)
y_excess = stock_returns - df_factors["RF"]

# 5.2.2 Build the regression
# We add a constant for alpha
X_factors_const = sm.add_constant(X_factors)

model = sm.OLS(y_excess, X_factors_const).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.573
Model:                            OLS   Adj. R-squared:                  0.547
Method:                 Least Squares   F-statistic:                     22.16
Date:                Fri, 17 Jan 2025   Prob (F-statistic):           1.25e-11
Time:                        19:40:11   Log-Likelihood:                 101.43
No. Observations:                  71   AIC:                            -192.9
Df Residuals:                      66   BIC:                            -181.5
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0244      0.011      2.208      0.0

In [20]:
factor_loadings = pd.DataFrame(columns=["Alpha","Beta_MktRF","Beta_SMB","Beta_HML"], index=tickers)

for tck in tickers:
    stock_returns_tck = df_returns[tck]
    y_excess = stock_returns_tck - df_factors["RF"]
    X_factors_const = sm.add_constant(X_factors)
    
    # Fit regression
    res = sm.OLS(y_excess, X_factors_const).fit()
    
    # Extract coefficients
    alpha = res.params["const"]
    beta_mkt = res.params["Mkt-RF"]
    beta_smb = res.params["SMB"]
    beta_hml = res.params["HML"]
    
    factor_loadings.loc[tck] = [alpha, beta_mkt, beta_smb, beta_hml]

factor_loadings

Unnamed: 0,Alpha,Beta_MktRF,Beta_SMB,Beta_HML
AAPL,0.012079,1.276775,-0.389183,-0.544205
MSFT,0.01099,0.964371,-0.387432,-0.439909
AMZN,0.000498,1.273908,-0.265077,-0.8976
TSLA,0.025445,2.021545,-0.016182,-1.146664
