# Downloading Stock Price Data

This notebook downloads historical stock data using the `yfinance` API and prepares both daily and monthly datasets for analysis.


In [21]:
import yfinance as yf
import pandas as pd

# Select tickers 
tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'META', 'TSLA', 'NVDA', 'JPM', 'UNH', 'HD']

# Download close prices
prices = yf.download(tickers, start="2014-01-01", end="2024-12-31", auto_adjust=True)['Close']
daily_returns = prices.pct_change().dropna()

# Save raw prices and returns
prices.to_csv('../data/raw/prices.csv')
daily_returns.to_csv('../data/raw/returns_daily.csv')

# Resample to monthly
monthly_prices = prices.resample('ME').last()
monthly_returns = monthly_prices.pct_change().dropna()
monthly_prices.to_csv('../data/processed/prices_monthly.csv')
monthly_returns.to_csv('../data/processed/returns_monthly.csv')

[*********************100%***********************]  10 of 10 completed


In [None]:
from pathlib import Path

FF5_CSV = Path('../data/raw/F-F_Research_Data_5_Factors_2x3.csv')
if not FF5_CSV.exists():
    raise FileNotFoundError(
        f"Couldn't find {FF5_CSV}. Download the CSV named 'F-F_Research_Data_5_Factors_2x3.csv' (Monthly) from the Kenneth French Data Library" 
    )

# read and clean
raw = pd.read_csv(FF5_CSV, skiprows=3)
raw = raw.rename(columns={raw.columns[0]: "Date"})
raw = raw[raw["Date"].astype(str).str.match(r"^\d{6}$", na=False)]
raw = raw.iloc[:, :7]

cols = ["Date", "Mkt-RF", "SMB", "HML", "RMW", "CMA", "RF"]
raw.columns = cols[:raw.shape[1]]

raw["Date"] = pd.to_datetime(raw["Date"], format="%Y%m")
raw.set_index("Date", inplace=True)
ff5 = raw.astype(float) / 100.0

ff5.to_csv("../data/processed/ff5_monthly_clean.csv")
ff5.head()


Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-01-01,0.028,-0.0122,0.0163,-0.0232,-0.0324,0.0037
2025-02-01,-0.0243,-0.0491,0.0491,0.011,0.0306,0.0033
2025-03-01,-0.0639,-0.0149,0.029,0.0211,-0.0047,0.0034
2025-04-01,-0.0084,-0.0186,-0.034,-0.0285,-0.0267,0.0035
2025-05-01,0.0606,-0.0072,-0.0288,0.0127,0.025,0.0038


In [None]:
# merge ff factors with stock returns on date
ff5.index = ff5.index + pd.offsets.MonthEnd(0)
panel = monthly_returns.stack().rename("ret").to_frame()
panel = panel.join(ff5, how="inner")
panel["excess_ret"] = panel["ret"] - panel["RF"]

panel.to_csv("../data/processed/panel_monthly_with_ff5.csv")
print("Date range:", panel.index.get_level_values(0).min(), "to", panel.index.get_level_values(0).max())
panel.head()