In [24]:
import yfinance as yf
from pandas_datareader import data as pdr
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [25]:
start_date = '2013-01-01'
end_date = '2023-01-01'

aapl = yf.download('AAPL', start=start_date, end=end_date, interval='1mo')
sp500 = yf.download('^GSPC', start=start_date, end=end_date, interval='1mo')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [26]:
# Combine monthly adj closes in one dataframe and calculate monthly returns

aapl_returns = aapl['Adj Close'].pct_change()
sp500_returns = sp500['Adj Close'].pct_change()

# Drop NaN values
aapl_returns.dropna(inplace=True)
sp500_returns.dropna(inplace=True)

merged_data = pd.concat([aapl_returns, sp500_returns], axis=1)
merged_data.columns = ['AAPL', 'SP500']

merged_data

Unnamed: 0_level_0,AAPL,SP500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-02-01,-0.030934,0.011061
2013-03-01,0.008700,0.035988
2013-04-01,0.000271,0.018086
2013-05-01,0.015696,0.020763
2013-06-01,-0.112457,-0.014999
...,...,...
2022-08-01,-0.032552,-0.042440
2022-09-01,-0.119756,-0.093396
2022-10-01,0.109551,0.079863
2022-11-01,-0.034629,0.053753


In [27]:
# Run OLS regression
X = merged_data['SP500']
y = merged_data['AAPL']

X = sm.add_constant(X)

model = sm.OLS(y, X).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   AAPL   R-squared:                       0.452
Model:                            OLS   Adj. R-squared:                  0.447
Method:                 Least Squares   F-statistic:                     96.45
Date:                Wed, 25 Sep 2024   Prob (F-statistic):           5.76e-17
Time:                        12:13:44   Log-Likelihood:                 167.45
No. Observations:                 119   AIC:                            -330.9
Df Residuals:                     117   BIC:                            -325.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0109      0.006      1.941      0.0

In [28]:
# write a program to add 10 year treasury bill yields to obtain coefficients on both market and treasuries

In [29]:
# Download 10 year treasury bill yields
tnx = yf.download('^TNX', start=start_date, end=end_date, interval='1mo')

# Merge the data to the existing dataframe
merged_data = pd.concat([merged_data, tnx['Adj Close']], axis=1)
merged_data.columns = ['AAPL', 'SP500', 'TNX']
merged_data.dropna(inplace=True)

merged_data


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,AAPL,SP500,TNX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-02-01,-0.030934,0.011061,1.888
2013-03-01,0.008700,0.035988,1.852
2013-04-01,0.000271,0.018086,1.675
2013-05-01,0.015696,0.020763,2.164
2013-06-01,-0.112457,-0.014999,2.478
...,...,...,...
2022-08-01,-0.032552,-0.042440,3.133
2022-09-01,-0.119756,-0.093396,3.804
2022-10-01,0.109551,0.079863,4.077
2022-11-01,-0.034629,0.053753,3.703


In [30]:
# Run OLS regression
X = merged_data[['SP500', 'TNX']]
y = merged_data['AAPL']

X = sm.add_constant(X)

model = sm.OLS(y, X).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   AAPL   R-squared:                       0.468
Model:                            OLS   Adj. R-squared:                  0.459
Method:                 Least Squares   F-statistic:                     50.98
Date:                Wed, 25 Sep 2024   Prob (F-statistic):           1.30e-16
Time:                        12:13:44   Log-Likelihood:                 169.20
No. Observations:                 119   AIC:                            -332.4
Df Residuals:                     116   BIC:                            -324.1
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0420      0.018      2.386      0.0

In [31]:
# Assess multicullinearity by calculating pairwise correlation between the independent variables
correlation_matrix = merged_data.corr()
correlation_matrix

Unnamed: 0,AAPL,SP500,TNX
AAPL,1.0,0.672199,-0.213157
SP500,0.672199,1.0,-0.130942
TNX,-0.213157,-0.130942,1.0


In [32]:
# Calculate and display variance inflation factors

vif_data = pd.DataFrame()
vif_data['Feature'] = X.columns
vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
vif_data

Unnamed: 0,Feature,VIF
0,const,10.522952
1,SP500,1.017445
2,TNX,1.017445


In [33]:
# write a program to test for multicolinarity and heteroskedasticity. re-estimate the regression using robust standard errors

# Re-estimate the regression using robust standard errors get_robustcov_results
results_adjusted = model.get_robustcov_results(cov_type='HC3')
print(results_adjusted.summary())

                            OLS Regression Results                            
Dep. Variable:                   AAPL   R-squared:                       0.468
Model:                            OLS   Adj. R-squared:                  0.459
Method:                 Least Squares   F-statistic:                     68.31
Date:                Wed, 25 Sep 2024   Prob (F-statistic):           2.48e-20
Time:                        12:13:44   Log-Likelihood:                 169.20
No. Observations:                 119   AIC:                            -332.4
Df Residuals:                     116   BIC:                            -324.1
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0420      0.019      2.201      0.0

In [34]:
# adjust results for autocorrelation

results_robust = model.get_robustcov_results(cov_type='HAC', maxlags=1)
print(results_robust.summary())

                            OLS Regression Results                            
Dep. Variable:                   AAPL   R-squared:                       0.468
Model:                            OLS   Adj. R-squared:                  0.459
Method:                 Least Squares   F-statistic:                     71.22
Date:                Wed, 25 Sep 2024   Prob (F-statistic):           6.64e-21
Time:                        12:13:44   Log-Likelihood:                 169.20
No. Observations:                 119   AIC:                            -332.4
Df Residuals:                     116   BIC:                            -324.1
Df Model:                           2                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0420      0.018      2.306      0.0