In [3]:
import pandas as pd
import numpy as np
import pandas_datareader.data as reader
import datetime as dt
import statsmodels.api as sm

In [4]:
end = dt.date(2019,12,31)
start = dt.date(2019,1,1)
stock = ['TSLA']

In [5]:
#Here's a look at our Tesla stock dataset
reader.get_data_yahoo(stock, start, end)

Attributes,Adj Close,Close,High,Low,Open,Volume
Symbols,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2019-01-02,62.023998,62.023998,63.026001,59.759998,61.220001,58293000
2019-01-03,60.071999,60.071999,61.880001,59.476002,61.400002,34826000
2019-01-04,63.537998,63.537998,63.599998,60.546001,61.200001,36970500
2019-01-07,66.991997,66.991997,67.348000,63.549999,64.344002,37756000
2019-01-08,67.070000,67.070000,68.802002,65.403999,68.391998,35042500
...,...,...,...,...,...,...
2019-12-24,85.050003,85.050003,85.094002,82.538002,83.671997,40273500
2019-12-26,86.188004,86.188004,86.695999,85.269997,85.582001,53169500
2019-12-27,86.075996,86.075996,87.061996,85.222000,87.000000,49728500
2019-12-30,82.940002,82.940002,85.800003,81.851997,85.758003,62932000


In [6]:
#We are only interested in the 'Adj Close column'
stock_prices = reader.get_data_yahoo(stock, start, end)['Adj Close']
stock_prices.head()

Symbols,TSLA
Date,Unnamed: 1_level_1
2019-01-02,62.023998
2019-01-03,60.071999
2019-01-04,63.537998
2019-01-07,66.991997
2019-01-08,67.07


In [7]:
#Calculating the stcok return
stock_prices['TSLAr'] = np.log(stock_prices['TSLA']/stock_prices['TSLA'].shift(1))
stock_prices.head()

Symbols,TSLA,TSLAr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-02,62.023998,
2019-01-03,60.071999,-0.031978
2019-01-04,63.537998,0.056094
2019-01-07,66.991997,0.052935
2019-01-08,67.07,0.001164


In [8]:
#Let's drop the NaN values
stock_prices = stock_prices.dropna()
stock_prices.head()

Symbols,TSLA,TSLAr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-03,60.071999,-0.031978
2019-01-04,63.537998,0.056094
2019-01-07,66.991997,0.052935
2019-01-08,67.07,0.001164
2019-01-09,67.706001,0.009438


Getting the benchmark data and returns

In [9]:
#We are only interested in the 'Adj Close column'
bench = ['^GSPC']
benchmark = reader.get_data_yahoo(bench, start, end)['Adj Close']
benchmark.head()

Symbols,^GSPC
Date,Unnamed: 1_level_1
2019-01-02,2510.030029
2019-01-03,2447.889893
2019-01-04,2531.939941
2019-01-07,2549.689941
2019-01-08,2574.409912


In [10]:
#Calculating the stcok return
benchmark['GSPCr'] = np.log(benchmark['^GSPC']/benchmark['^GSPC'].shift(1))
benchmark = benchmark.dropna()
benchmark

Symbols,^GSPC,GSPCr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-03,2447.889893,-0.025068
2019-01-04,2531.939941,0.033759
2019-01-07,2549.689941,0.006986
2019-01-08,2574.409912,0.009649
2019-01-09,2584.959961,0.004090
...,...,...
2019-12-24,3223.379883,-0.000195
2019-12-26,3239.909912,0.005115
2019-12-27,3240.020020,0.000034
2019-12-30,3221.290039,-0.005798


In [11]:
#If we compare the two datasets, we can see that they have the same number of rows
print(stock_prices.shape)
print(benchmark.shape)

(251, 2)
(251, 2)


Now let's merge the two datasets. Our main problem is with the 'Date' columns which have different formats

In [12]:
#Now merging
data = pd.merge(stock_prices, benchmark, on='Date')
data.head()

Symbols,TSLA,TSLAr,^GSPC,GSPCr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-03,60.071999,-0.031978,2447.889893,-0.025068
2019-01-04,63.537998,0.056094,2531.939941,0.033759
2019-01-07,66.991997,0.052935,2549.689941,0.006986
2019-01-08,67.07,0.001164,2574.409912,0.009649
2019-01-09,67.706001,0.009438,2584.959961,0.00409


In [2]:
beta = 1.42 #For Tesla stock
rf = 0.02/365 #Daily risk-free rate

Rit−Rft = αit + β1*(RMt−Rft) + β2*SMBt + β3*HMLt + ϵit

In [None]:
dataset['TSLA-RF'] = dataset.TSLAr - dataset.RF
dataset['RMt-Rft'] = dataset.GSPCr - dataset.RF

In [None]:
dataset

In [None]:
#Let's run our Fama-French regression model

y = dataset['TSLA-RF'] #Our dependant variable

X = dataset[['RMt-Rft', 'SMB', 'HML']] #Dependant variables

X_sm = sm.add_constant(X)

In [None]:
model = sm.OLS(y, X_sm)
results = model.fit()
results.summary()