In [7]:
#!pip install stargazer
#!pip install causaldata
#!pip install statsmodels

In [1]:
import pandas as pd
import statsmodels.formula.api as sm
from stargazer.stargazer import Stargazer
from causaldata import restaurant_inspections
import matplotlib.pyplot as plt
from IPython.display import HTML

plt.style.use('fivethirtyeight')

import warnings
warnings.filterwarnings('ignore')

In [11]:
res = restaurant_inspections.load_pandas().data

In [12]:
res.head()

Unnamed: 0,business_name,inspection_score,Year,NumberofLocations,Weekend
0,MCGINLEYS PUB,94,2017,9,False
1,VILLAGE INN #1,86,2015,66,False
2,RONNIE SUSHI 2,80,2016,79,False
3,FRED MEYER - RETAIL FISH,96,2003,86,False
4,PHO GRILL,83,2017,53,False


13.1.7 Coding up Regression

In [21]:
# Perform the first, one-predictor regression
# use the sm.ols() function, with ~ telling us what the dependent variable varies over
m1 = sm.ols(formula='inspection_score ~ NumberofLocations',
    data=res).fit()

#just check
m1.summary()

0,1,2,3
Dep. Variable:,inspection_score,R-squared:,0.065
Model:,OLS,Adj. R-squared:,0.065
Method:,Least Squares,F-statistic:,1877.0
Date:,"Mon, 12 Dec 2022",Prob (F-statistic):,0.0
Time:,17:49:04,Log-Likelihood:,-87492.0
No. Observations:,27178,AIC:,175000.0
Df Residuals:,27176,BIC:,175000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,94.8657,0.046,2049.047,0.000,94.775,94.956
NumberofLocations,-0.0189,0.000,-43.321,0.000,-0.020,-0.018

0,1,2,3
Omnibus:,2842.496,Durbin-Watson:,1.932
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3871.323
Skew:,-0.856,Prob(JB):,0.0
Kurtosis:,3.7,Cond. No.,134.0


In [22]:
# Now add year as a control
# Just use + to add more terms to the regression

m2 = sm.ols(formula='inspection_score ~ NumberofLocations + Year',
    data=res).fit()

#just check
m2.summary()

0,1,2,3
Dep. Variable:,inspection_score,R-squared:,0.068
Model:,OLS,Adj. R-squared:,0.068
Method:,Least Squares,F-statistic:,997.4
Date:,"Mon, 12 Dec 2022",Prob (F-statistic):,0.0
Time:,17:49:05,Log-Likelihood:,-87437.0
No. Observations:,27178,AIC:,174900.0
Df Residuals:,27175,BIC:,174900.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,225.3327,12.411,18.156,0.000,201.006,249.659
NumberofLocations,-0.0192,0.000,-44.033,0.000,-0.020,-0.018
Year,-0.0649,0.006,-10.512,0.000,-0.077,-0.053

0,1,2,3
Omnibus:,2828.999,Durbin-Watson:,1.931
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3861.424
Skew:,-0.849,Prob(JB):,0.0
Kurtosis:,3.726,Cond. No.,681000.0


In [24]:
# Open a file to write to
f = open('regression_table.html','w')

# Give Stargazer a list of the models we want in our table and save to file
regtable=Stargazer([m1,m2])
f.write(regtable.render_html())
f.close()

In [2]:
#read the table
HTML(filename='regression_table.html')

0,1,2
,,
,Dependent variable:inspection_score,Dependent variable:inspection_score
,,
,(1),(2)
,,
Intercept,94.866***,225.333***
,(0.046),(12.411)
NumberofLocations,-0.019***,-0.019***
,(0.000),(0.000)
Year,,-0.065***


13.2.5 Coding up Polymonials and Interactions

# Use I() to insert calculations of your variables and ** to square

m1 = sm.ols(formula)
