<a href="https://colab.research.google.com/github/hmelberg/causal/blob/master/3%20Regression%20discontinuity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Regression Discontinuity

Example: Did the introduction of the Uber taxi ervice reduce the number of DUI incidents?

Link for data and some comments (external) http://austinclemens.com/blog/2014/06/08/436/

## Import key modules

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

## Import data to a dataframe (called df)

In [3]:
datafile = "https://raw.githubusercontent.com/hmelberg/causal/master/data/uber_replication.csv"
df = pd.read_csv(datafile)
#df = pd.read_csv('uber_replication.csv')

## Have a look at the data

In [4]:
df.head(5)

Unnamed: 0,marijuana,dayofweek,uber,eventdate,incidents
0,0,Sat,0,0,1
1,0,Sun,0,1,2
2,0,Mon,0,2,1
3,0,Tue,0,3,0
4,0,Wed,0,4,1


In [None]:
df.columns


Index(['marijuana', 'dayofweek', 'uber', 'eventdate', 'incidents'], dtype='object')

## Run regression

In [None]:
model = 'incidents ~ uber + marijuana + C(dayofweek) + eventdate'

In [None]:
wide_window = smf.ols(formula=model, data=df).fit()

In [None]:
wide_window.summary()

0,1,2,3
Dep. Variable:,incidents,R-squared:,0.351
Model:,OLS,Adj. R-squared:,0.347
Method:,Least Squares,F-statistic:,85.03
Date:,"Mon, 29 Oct 2018",Prob (F-statistic):,3.5e-126
Time:,23:42:02,Log-Likelihood:,-3632.0
No. Observations:,1422,AIC:,7284.0
Df Residuals:,1412,BIC:,7337.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.5765,0.282,19.763,0.000,5.023,6.130
C(dayofweek)[T.Mon],-2.5918,0.310,-8.362,0.000,-3.200,-1.984
C(dayofweek)[T.Sat],2.1619,0.310,6.984,0.000,1.555,2.769
C(dayofweek)[T.Sun],1.8781,0.310,6.059,0.000,1.270,2.486
C(dayofweek)[T.Thu],-1.3857,0.310,-4.470,0.000,-1.994,-0.778
C(dayofweek)[T.Tue],-2.7464,0.310,-8.860,0.000,-3.354,-2.138
C(dayofweek)[T.Wed],-2.2409,0.310,-7.230,0.000,-2.849,-1.633
uber,-0.6770,0.334,-2.025,0.043,-1.333,-0.021
marijuana,1.4383,0.361,3.984,0.000,0.730,2.146

0,1,2,3
Omnibus:,55.135,Durbin-Watson:,1.428
Prob(Omnibus):,0.0,Jarque-Bera (JB):,74.141
Skew:,0.388,Prob(JB):,7.95e-17
Kurtosis:,3.806,Cond. No.,6130.0


In [None]:
df.uber[380:400]

380    0
381    0
382    0
383    0
384    0
385    0
386    0
387    0
388    0
389    0
390    0
391    1
392    1
393    1
394    1
395    1
396    1
397    1
398    1
399    1
Name: uber, dtype: int64

# Change bandwidth (window)

## Focus only on the days around when uber was introduced

In [None]:
df[380:400]

Unnamed: 0,marijuana,dayofweek,uber,eventdate,incidents
380,0,Mon,0,380,7
381,0,Tue,0,381,2
382,0,Wed,0,382,9
383,0,Thu,0,383,5
384,0,Fri,0,384,7
385,0,Sat,0,385,10
386,0,Sun,0,386,9
387,0,Mon,0,387,1
388,0,Tue,0,388,3
389,0,Wed,0,389,6


In [None]:
# try window of 100 days before and after here. Feel free to try other dates
data_window= df.query('390.5 - 100 < eventdate < 390.5 + 100')
other_window = smf.ols(formula=model, data=data_window).fit()
other_window.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


0,1,2,3
Dep. Variable:,incidents,R-squared:,0.396
Model:,OLS,Adj. R-squared:,0.371
Method:,Least Squares,F-statistic:,15.68
Date:,"Tue, 30 Oct 2018",Prob (F-statistic):,1.16e-17
Time:,00:05:27,Log-Likelihood:,-457.69
No. Observations:,200,AIC:,933.4
Df Residuals:,191,BIC:,963.1
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.0356,2.087,2.891,0.004,1.918,10.153
C(dayofweek)[T.Mon],-0.9692,0.647,-1.498,0.136,-2.245,0.307
C(dayofweek)[T.Sat],3.4838,0.641,5.434,0.000,2.219,4.748
C(dayofweek)[T.Sun],2.6369,0.647,4.076,0.000,1.361,3.913
C(dayofweek)[T.Thu],-0.5099,0.641,-0.795,0.428,-1.775,0.755
C(dayofweek)[T.Tue],-2.0753,0.647,-3.208,0.002,-3.352,-0.799
C(dayofweek)[T.Wed],-0.9247,0.641,-1.442,0.151,-2.190,0.340
uber,-0.7561,0.691,-1.094,0.275,-2.119,0.607
marijuana,0,0,,,0,0

0,1,2,3
Omnibus:,2.669,Durbin-Watson:,2.154
Prob(Omnibus):,0.263,Jarque-Bera (JB):,2.777
Skew:,-0.003,Prob(JB):,0.249
Kurtosis:,3.577,Cond. No.,inf
