In [1]:
import statsmodels.api as sm
import numpy as np
import pandas_datareader as web
import yfinance as yf
import pandas as pd
from datetime import datetime

def regress3fff_yf(ticker, start=None, end=None):
    """
    Linear regression of the fama french 3 factor model using a given yahoo finance ticker.
    If providing a date, the YYYY-MM-DD format will work. If a date is not provided, the code will default to the last month of the Fama French data for the end month and the first whole month of the applicable ticker for the start month.
    
    """
    
    df1 = yf.download(ticker,
                      start=start,
                      end=end,
                      progress=False)
    df1min = df1.index.min() 
    df1minm = pd.Period(df1min, freq='M').end_time.date() 

    ff_data = pd.read_csv("https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip", 
                          skiprows=3)
    ff_data.rename(columns={0:'Date'}, inplace=True)
    ff_data.columns = ff_data.columns.str.replace('Unnamed: 0','Date')
    ff_data = ff_data[~ff_data.astype(str).apply(lambda x: x.str.contains('Annual Factors: January-December')).any(axis=1)]
    ff_data = ff_data[ff_data['Date'].str.strip().str.len() == 6]
    max_date = ff_data['Date'].max()
    ff_data.set_index('Date', inplace=True)
    ff_data = ff_data.loc[:max_date]
    ff_data.index = pd.to_datetime(ff_data.index, format="%Y%m").to_period('M')
    ff_data = ff_data.apply(pd.to_numeric, errors='coerce')
    ff_data = ff_data / 100
    
    if start is None:
        start = str(df1min.date())
    
    prices = df1.loc[df1minm:, ['Adj Close']]
    returns_d = prices / prices.shift(1) - 1
    returns_m = returns_d.resample('M').apply(lambda x: (x + 1).prod() - 1).to_period('M')
    
    # Adjust the date range for Fama-French data to match the mutual fund data
    if end is None:
        end = str(max_date)
    ff_data = ff_data.loc[start:end]

    eg_excess = returns_m[start:end] - ff_data.loc[start:end, ['RF']].values
    mkt_excess = ff_data.loc[start:end,['Mkt-RF']]
    exp_var = mkt_excess.copy()
    exp_var["Constant"] = 1
    exp_var["Value"] = ff_data.loc[start:end,['HML']]
    exp_var["Size"] = ff_data.loc[start:end,['SMB']]
    
    lm = sm.OLS(eg_excess, exp_var).fit()
    return lm.summary()

In [2]:
regress3fff_yf('NSBRX', start='2007-04-01', end='2023-05-31')

0,1,2,3
Dep. Variable:,Adj Close,R-squared:,0.928
Model:,OLS,Adj. R-squared:,0.926
Method:,Least Squares,F-statistic:,811.3
Date:,"Tue, 25 Jul 2023",Prob (F-statistic):,5.11e-108
Time:,18:39:15,Log-Likelihood:,596.47
No. Observations:,194,AIC:,-1185.0
Df Residuals:,190,BIC:,-1172.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Mkt-RF,0.8595,0.018,47.067,0.000,0.824,0.896
Constant,0.0007,0.001,0.904,0.367,-0.001,0.002
Value,0.0891,0.024,3.700,0.000,0.042,0.137
Size,-0.2053,0.035,-5.929,0.000,-0.274,-0.137

0,1,2,3
Omnibus:,17.223,Durbin-Watson:,2.134
Prob(Omnibus):,0.0,Jarque-Bera (JB):,35.728
Skew:,0.398,Prob(JB):,1.75e-08
Kurtosis:,4.946,Cond. No.,43.5


In [3]:
regress3fff_yf('NSBRX')

0,1,2,3
Dep. Variable:,Adj Close,R-squared:,0.925
Model:,OLS,Adj. R-squared:,0.923
Method:,Least Squares,F-statistic:,828.8
Date:,"Tue, 25 Jul 2023",Prob (F-statistic):,1.39e-113
Time:,18:39:29,Log-Likelihood:,637.94
No. Observations:,207,AIC:,-1268.0
Df Residuals:,203,BIC:,-1255.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Mkt-RF,0.8600,0.018,47.661,0.000,0.824,0.896
Constant,0.0012,0.001,1.506,0.134,-0.000,0.003
Value,0.0941,0.024,3.980,0.000,0.047,0.141
Size,-0.2232,0.034,-6.625,0.000,-0.290,-0.157

0,1,2,3
Omnibus:,15.02,Durbin-Watson:,2.164
Prob(Omnibus):,0.001,Jarque-Bera (JB):,22.67
Skew:,0.442,Prob(JB):,1.19e-05
Kurtosis:,4.359,Cond. No.,44.1
