In [1]:
import pandas as pd
import yfinance as yf
import statsmodels.api as sm

In [2]:
RISKY_ASSET = 'VTI'
MARKET_BENCHMARK = 'SPY'
MKT_RF ='SHY'
START_DATE = '2013-01-24'
END_DATE = '2023-01-24'

In [3]:
# create data frame of timeseries for asset, benchmark, and risk free rate proxy
df = yf.download([RISKY_ASSET, MARKET_BENCHMARK, MKT_RF],start=START_DATE,end=END_DATE,progress=False)
# calculate returns
X = df['Adj Close'].rename(columns={RISKY_ASSET: 'asset', MARKET_BENCHMARK: 'market', MKT_RF: 'mkt_rf'}) \
 .resample('M') \
 .last() \
 .pct_change() \
 .dropna()

In [4]:
rf_series = X.loc[:,'mkt_rf'] # SHY return series 
X1 = X.subtract(rf_series, axis=0) # subtract RF return series from asset and benchmark series
X2= X1.drop(['mkt_rf'], axis=1) # Drop RF series (now all 0 values)

In [5]:
# covariance method for beta calculation (Cov(asset, benchmark) divided by volatility of market returns)
covariance = X2.cov().iloc[0,1]
benchmark_variance = X2.market.var()
beta = covariance / benchmark_variance
beta

1.0242160367647135

In [6]:
# CAPM regression setup
y = X2.pop('asset') #extract dependent variable (asset) times series
X2 = sm.add_constant(X2) # independent variable (benchmark) time series
capm_model = sm.OLS(y, X2).fit() # OLS regression
capm_model.summary() # Regression output. market coefficient should equal beta variable above from covariance method


0,1,2,3
Dep. Variable:,asset,R-squared:,0.992
Model:,OLS,Adj. R-squared:,0.992
Method:,Least Squares,F-statistic:,14230.0
Date:,"Sun, 26 Mar 2023",Prob (F-statistic):,7.190000000000001e-125
Time:,22:34:05,Log-Likelihood:,492.95
No. Observations:,120,AIC:,-981.9
Df Residuals:,118,BIC:,-976.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0005,0.000,-1.224,0.223,-0.001,0.000
market,1.0242,0.009,119.290,0.000,1.007,1.041

0,1,2,3
Omnibus:,0.459,Durbin-Watson:,1.9
Prob(Omnibus):,0.795,Jarque-Bera (JB):,0.604
Skew:,-0.119,Prob(JB):,0.739
Kurtosis:,2.746,Cond. No.,23.4
