<a href="https://colab.research.google.com/github/jeanmhuang/quant-projects/blob/main/Fama_French.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import yfinance as yf
import pandas as pd

# Download full dataframe for AAPL
data = yf.download('AAPL', start='2020-01-01', end='2024-01-01')

# Check available columns
print(data.columns)
data.head()

# Extract closing prices
close = data['Close']

# Calculate daily returns
returns = close.pct_change().dropna()
returns.name = 'AAPL'
returns.head()




[*********************100%***********************]  1 of 1 completed

MultiIndex([( 'Close', 'AAPL'),
            (  'High', 'AAPL'),
            (   'Low', 'AAPL'),
            (  'Open', 'AAPL'),
            ('Volume', 'AAPL')],
           names=['Price', 'Ticker'])





Ticker,AAPL
Date,Unnamed: 1_level_1
2020-01-03,-0.009722
2020-01-06,0.007968
2020-01-07,-0.004703
2020-01-08,0.016086
2020-01-09,0.021241


In [17]:
# Load Fama-French 3-factor daily data from Kenneth French's site
import zipfile
import io
import requests
import pandas as pd

url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"

# Download and extract zip file
response = requests.get(url)
z = zipfile.ZipFile(io.BytesIO(response.content))
file_name = z.namelist()[0]

# Read the CSV inside the zip and clean it
df_factors = pd.read_csv(z.open(file_name), skiprows=3)
df_factors.rename(columns={df_factors.columns[0]: 'Date'}, inplace=True)

# Remove non-date footer rows
df_factors = df_factors[df_factors['Date'].str.len() == 8]
df_factors['Date'] = pd.to_datetime(df_factors['Date'], format='%Y%m%d')
df_factors.set_index('Date', inplace=True)

# Convert % strings to decimal floats
df_factors = df_factors.apply(pd.to_numeric, errors='coerce') / 100

# Rename columns
df_factors.rename(columns={
    'Mkt-RF': 'Market',
    'SMB': 'Size',
    'HML': 'Value',
    'RF': 'RiskFree'
}, inplace=True)

df_factors.head()

Unnamed: 0_level_0,Market,Size,Value,RiskFree
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07-01,0.0009,-0.0025,-0.0027,9e-05
1926-07-02,0.0045,-0.0033,-0.0006,9e-05
1926-07-06,0.0017,0.003,-0.0039,9e-05
1926-07-07,0.0009,-0.0058,0.0002,9e-05
1926-07-08,0.0022,-0.0038,0.0019,9e-05


In [18]:
# Merge AAPL returns and Fama-French factors
df = pd.concat([returns, df_factors], axis=1).dropna()

# Create excess return column
df['Excess'] = df['AAPL'] - df['RiskFree']
df.head()

Unnamed: 0_level_0,AAPL,Market,Size,Value,RiskFree,Excess
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-03,-0.009722,-0.0067,0.0038,0.0001,6e-05,-0.009782
2020-01-06,0.007968,0.0036,-0.0007,-0.0055,6e-05,0.007908
2020-01-07,-0.004703,-0.0019,-0.0001,-0.0026,6e-05,-0.004763
2020-01-08,0.016086,0.0047,-0.0007,-0.0064,6e-05,0.016026
2020-01-09,0.021241,0.0065,-0.0063,-0.0048,6e-05,0.021181


In [19]:
import statsmodels.api as sm

# Define X (independent variables) and y (dependent variable)
X = df[['Market', 'Size', 'Value']]
X = sm.add_constant(X)  # adds intercept term
y = df['Excess']

# Run OLS regression
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 Excess   R-squared:                       0.726
Model:                            OLS   Adj. R-squared:                  0.725
Method:                 Least Squares   F-statistic:                     883.5
Date:                Thu, 05 Jun 2025   Prob (F-statistic):          1.06e-280
Time:                        03:12:28   Log-Likelihood:                 3100.3
No. Observations:                1005   AIC:                            -6193.
Df Residuals:                    1001   BIC:                            -6173.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0006      0.000      1.669      0.0