# Regression Using StatsModels
- Run a regression to find beta and alpha of FB to SPY
- FB<sub>t</sub> ~ $\beta$ * SPY<sub>t</sub> + $\alpha$ + e<sub>t</sub>

In [5]:
import statsmodels.api as sm
import pandas as pd
import numpy as np
import yfinance as yf 

# pip install yfinance
# https://pypi.org/project/yfinance/

In [3]:
!pip install statsmodels

Defaulting to user installation because normal site-packages is not writeable
Collecting statsmodels
[0m  Downloading statsmodels-0.14.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m877.9 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Collecting patsy>=0.5.6
  Downloading patsy-0.5.6-py2.py3-none-any.whl (233 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.9/233.9 KB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
Installing collected packages: patsy, statsmodels
Successfully installed patsy-0.5.6 statsmodels-0.14.2


In [6]:
# download price data
univ = ['SPY','META']
px = yf.download(univ, start="2016-01-01")['Adj Close']

[*********************100%%**********************]  2 of 2 completed


In [7]:
px

Ticker,META,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-04,102.010376,173.483521
2016-01-05,102.519333,173.776932
2016-01-06,102.758835,171.584915
2016-01-07,97.719193,167.468323
2016-01-08,97.130409,165.630112
...,...,...
2024-07-31,474.829987,550.809998
2024-08-01,497.739990,543.010010
2024-08-02,488.140015,532.900024
2024-08-05,475.730011,517.380005


In [9]:
# get returns
ret = px / px.shift() - 1
ret

Ticker,META,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-04,,
2016-01-05,0.004989,0.001691
2016-01-06,0.002336,-0.012614
2016-01-07,-0.049043,-0.023992
2016-01-08,-0.006025,-0.010976
...,...,...
2024-07-31,0.025130,0.016255
2024-08-01,0.048249,-0.014161
2024-08-02,-0.019287,-0.018618
2024-08-05,-0.025423,-0.029124


In [10]:
# set X and Y. Have to add constant
X = ret[['SPY']]
print(X)
X = sm.add_constant(X)
print(X)
Y = ret['META']

Ticker           SPY
Date                
2016-01-04       NaN
2016-01-05  0.001691
2016-01-06 -0.012614
2016-01-07 -0.023992
2016-01-08 -0.010976
...              ...
2024-07-31  0.016255
2024-08-01 -0.014161
2024-08-02 -0.018618
2024-08-05 -0.029124
2024-08-06  0.016304

[2162 rows x 1 columns]
Date
2016-01-04         NaN
2016-01-05    0.001691
2016-01-06   -0.012614
2016-01-07   -0.023992
2016-01-08   -0.010976
                ...   
2024-07-31    0.016255
2024-08-01   -0.014161
2024-08-02   -0.018618
2024-08-05   -0.029124
2024-08-06    0.016304
Name: SPY, Length: 2162, dtype: float64


In [None]:
# observe x,y
print (X)
print (Y)

In [None]:
# can't do this - missing data
results = sm.OLS(Y, X).fit()

In [None]:
# drop data
X = X.dropna()
Y = Y.dropna()
results = sm.OLS(Y, X).fit()

In [None]:
# check summary
results.summary()

In [None]:
# get params
results.params

In [None]:
# get tvalues
results.tvalues

Reconstruct FB
- FB<sub>t</sub> ~ $\beta$ * SPY<sub>t</sub> + $\alpha$ + e<sub>t</sub>

In [None]:
FB = results.params['SPY']*X['SPY'] + results.params['const'] + results.resid

In [None]:
# basically identical
(FB-Y).abs().sum()

In [None]:
beta_contr = results.params['SPY']*X['SPY']
prediction = results.params['SPY']*X['SPY'] + results.params['const']
alpha_contr = results.params['const'] + results.resid

In [None]:
alpha_contr

In [None]:
alpha_contr.cumsum().plot()

In [None]:
# correl is zero to spy
alpha_contr.corr(X['SPY'])

In [None]:
# "alpha"
alpha_contr.mean()

In [None]:
# information ratio
alpha_contr.mean()/alpha_contr.std()*np.sqrt(252)

In [None]:
# alpha t-stat
results.tvalues['const']