In [53]:
# import libraries
import pandas as pd
import statsmodels.api as sm
import yfinance as yf
import numpy as np
import datetime

# Get the data for the stock Apple by specifying the stock ticker, start date, and end date
ticker = ['MSFT']
market = ['^GSPC']

today = datetime.datetime.now().strftime('%Y-%m-%m')
fiveYear = (datetime.datetime.now() - datetime.timedelta(days=1825)).strftime('%Y-%m-%m')

fbdata = yf.download(tickers=ticker, start = fiveYear, end = today)
fbdataPd = pd.DataFrame(fbdata)
fb = fbdataPd[['Adj Close']]

marketdata = yf.download(tickers=market, start = fiveYear, end = today)
marketdataPd = pd.DataFrame(marketdata)
sp_500 = marketdataPd[['Adj Close']]

# joining the closing prices of the two datasets 
monthly_prices = pd.concat([fb['Adj Close'], sp_500['Adj Close']], axis=1)
monthly_prices.columns = ['MSFT', '^GSPC']

# check the head of the dataframe
print(monthly_prices.head())

# calculate monthly returns
monthly_returns = monthly_prices.pct_change(1)
clean_monthly_returns = monthly_returns.dropna(axis=0)  # drop first missing row
print(clean_monthly_returns.head())

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
                 MSFT        ^GSPC
Date                              
2015-07-07  40.092571  2081.340088
2015-07-08  40.038265  2046.680054
2015-07-09  40.291676  2051.310059
2015-07-10  40.373127  2076.620117
2015-07-13  41.214806  2099.600098
                MSFT     ^GSPC
Date                          
2015-07-08 -0.001355 -0.016653
2015-07-09  0.006329  0.002262
2015-07-10  0.002022  0.012338
2015-07-13  0.020847  0.011066
2015-07-14  0.001756  0.004453


In [54]:
# split dependent and independent variable
X = clean_monthly_returns['^GSPC']
y = clean_monthly_returns['MSFT']

# Add a constant to the independent value
X1 = sm.add_constant(X)

# make regression model 
model = sm.OLS(y, X1)

# fit model and print results
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                   MSFT   R-squared:                       0.697
Model:                            OLS   Adj. R-squared:                  0.696
Method:                 Least Squares   F-statistic:                     2883.
Date:                Thu, 16 Jul 2020   Prob (F-statistic):               0.00
Time:                        20:43:32   Log-Likelihood:                 4065.7
No. Observations:                1258   AIC:                            -8127.
Df Residuals:                    1256   BIC:                            -8117.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0010      0.000      3.640      0.0

In [49]:
# alternatively scipy linear regression
from scipy import stats
slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)

print(slope)

1.1860634796696605
