In [19]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web
from datetime import date as dt

In [20]:
# set parameters. Choose your security, benchmark, risk free rate (or proxy for risk free rate), start & end dates for the downloaded data
RISKY_ASSET = 'AXP'
START_DATE = '2013-03-31'
END_DATE = dt.today()

In [21]:
# three factors
factor_3_df = web.DataReader("F-F_Research_Data_Factors",
                             "famafrench",
                             start=START_DATE,
                             end=END_DATE)[0]
# momentum factor
momentum_df = web.DataReader("F-F_Momentum_Factor",
                             "famafrench",
                             start=START_DATE,
                             end=END_DATE)[0]

In [22]:
# Peak at the factors and risk free rate from professor French's website
factor_3_df

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-03,4.03,0.81,-0.19,0.00
2013-04,1.55,-2.36,0.45,0.00
2013-05,2.80,1.73,2.63,0.00
2013-06,-1.20,1.33,0.03,0.00
2013-07,5.65,1.86,0.57,0.00
...,...,...,...,...
2022-10,7.83,0.09,8.05,0.23
2022-11,4.60,-3.40,1.38,0.29
2022-12,-6.41,-0.68,1.32,0.33
2023-01,6.65,5.02,-4.05,0.35


In [23]:
# Peak at momentum factor
momentum_df

Unnamed: 0_level_0,Mom
Date,Unnamed: 1_level_1
2013-03,1.92
2013-04,0.22
2013-05,-2.02
2013-06,0.52
2013-07,1.76
...,...
2022-10,3.87
2022-11,-2.01
2022-12,4.52
2023-01,-15.96


In [24]:
# create data frame of timeseries for asset
df = yf.download([RISKY_ASSET],
                 start=START_DATE,
                 end=END_DATE,
                 progress=False,
                 auto_adjust=True)
df.iloc[-6:]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-04-06,160.630005,160.789993,158.229996,158.830002,3754500
2023-04-10,158.039993,161.169998,157.800003,161.139999,2372600
2023-04-11,161.669998,162.580002,160.320007,161.830002,2742300
2023-04-12,163.220001,163.220001,158.869995,159.289993,2817600
2023-04-13,159.800003,162.419998,158.979996,162.300003,2729000
2023-04-14,164.240005,164.770004,161.460007,163.220001,2617400


In [25]:
# calculate returns
y = df['Close'].resample('M') \
    .last() \
    .pct_change() \
    .dropna()

y.index = y.index.to_period("m")
y.name = "ret"
y.iloc[-6:]

Date
2022-11    0.061569
2022-12   -0.062440
2023-01    0.188063
2023-02   -0.005374
2023-03   -0.051957
2023-04   -0.006822
Freq: M, Name: ret, dtype: float64

In [26]:
# join all datasets on the index
factor_4_df = factor_3_df.join(momentum_df).join(y)
# rename columns
factor_4_df.columns = ["mkt", "smb", "hml", "rf", "mom", "rtn"]
# divide everything (except returns) by 100
factor_4_df.loc[:, factor_4_df.columns != "rtn"] /= 100
# calculate excess returns
factor_4_df["excess_rtn"] = (
    factor_4_df["rtn"] - factor_4_df["rf"]
)
factor_4_df["excess_rtn"]

Date
2013-03         NaN
2013-04         NaN
2013-05    0.106710
2013-06   -0.012548
2013-07   -0.010233
             ...   
2022-10    0.102264
2022-11    0.058669
2022-12   -0.065740
2023-01    0.184563
2023-02   -0.008774
Freq: M, Name: excess_rtn, Length: 120, dtype: float64

In [27]:
# set up OLS regression model and print summary
four_factor_model = smf.ols(
    formula="excess_rtn ~ mkt + smb + hml + mom",
    data=factor_4_df).fit()
print(four_factor_model.summary())

                            OLS Regression Results                            
Dep. Variable:             excess_rtn   R-squared:                       0.607
Model:                            OLS   Adj. R-squared:                  0.593
Method:                 Least Squares   F-statistic:                     43.57
Date:                Sat, 15 Apr 2023   Prob (F-statistic):           4.47e-22
Time:                        01:28:20   Log-Likelihood:                 197.38
No. Observations:                 118   AIC:                            -384.8
Df Residuals:                     113   BIC:                            -370.9
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0022      0.004      0.499      0.6