## Welcome

This is material for the **Instrumental Variables** chapter in Scott Cunningham's book, [Causal Inference: The Mixtape.](https://mixtape.scunning.com/)

In [None]:
!pip install -q linearmodels
!pip install -q rpy2

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from linearmodels.iv import IV2SLS
from tabulate import tabulate

In [None]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

def read_data(file):
    full_path = "https://raw.github.com/scunning1975/mixtape/master/" + file
    
    return pd.read_stata(full_path)

In [None]:
def lm_robust(formula, data, group_col):
    regression = sm.OLS.from_formula(formula, data = data).fit(cov_type="cluster",cov_kwds={"groups":data[group_col]})
    return regression

## Card

In [None]:
card = read_data("card.dta")

#OLS
ols_reg = sm.OLS.from_formula("lwage ~ educ + exper + black + south + married + smsa", 
              data = card).fit()

print(ols_reg.summary())

In [None]:
card.dropna(subset = ['married'], inplace = True)

In [None]:
#2SLS
iv_reg = IV2SLS.from_formula("lwage ~ 1 + exper + black + married + south + smsa + [educ ~ nearc4]", card).fit()
print(iv_reg.summary)

#### Questions
- Interpret the coefficient on education when we used OLS versus when used 2SLS. 
- How does the estimated effect of education change when instrumenting with being close to a 4-year college?  That is, does the coefficient get larger or smaller compared to OLS?
- If the only source of bias in our OLS regression was omitted heterogeneous ability, then will 2SLS be larger, smaller or the same as OLS estimate?  Why/why not?   
- Is the finding of the causal effect of educating when using 2SLS, when compared to the estimate using OLS, consistent with ability bias?  What else do you think may be going on and why?
- What sorts of individuals will go to college regardless of whether a college is near them?  What sorts of individuals will never go to a college even if one is near them?  And what sorts of people will go to a college if one is near them but won't go to college if it is not near them?

## JIVE 

In [None]:
judge = read_data("judge_fe.dta")
judge['bailDate'] = (judge['bailDate'] - pd.to_datetime('1970-01-01')).dt.days.values

# grouped variable names from the data set
judge_pre = "+".join(judge.columns[judge.columns.str.contains('^judge_pre_[1-7]')])
demo = "+".join(['black', 'age', 'male', 'white'])
off = "+".join(['fel', 'mis', 'sum', 'F1', 'F2', 'F3', 'M1', 'M2', 'M3', 'M'])
prior = "+".join(['priorCases', 'priorWI5', 'prior_felChar', 'prior_guilt', 'onePrior', 'threePriors'])
control2 = "+".join(['day', 'day2', 'bailDate', 't1', 't2', 't3', 't4', 't5'])

#formulas used in the OLS
min_formula = "guilt ~ jail3 + " + control2
max_formula = """guilt ~ jail3 + possess + robbery + DUI1st + drugSell + 
                aggAss + {demo} + {prior} + {off} + {control2}""".format(demo=demo,
                                                                        prior=prior,
                                                                        off=off,
                                                                        control2=control2)

#max variables and min variables
min_ols = sm.OLS.from_formula(min_formula, data = judge).fit()
max_ols = sm.OLS.from_formula(max_formula, data = judge).fit()
min_ols_est = round(min_ols.params.jail3, 3)
max_ols_est = round(max_ols.params.jail3, 3)

models = [min_ols, max_ols]
model_names = ['Min. Variables', 'Max. variables']
summary_col(models, float_format='%0.3f', model_names = model_names, regressor_order = ('jail3', 'day', 'day2', 'bailDate', 't1', 't2', 't3', 't4', 't5'))

In [None]:
#--- Instrumental Variables Estimations
#-- 2sls main results
#- Min and Max Control formulas
min_formula = "guilt ~ {control2} + [jail3 ~ {judge_pre}]".format(control2=control2, judge_pre=judge_pre)
max_formula = """guilt ~ {demo} + possess + {prior} + robbery + {off} + DUI1st + {control2} + drugSell + aggAss +
                    [jail3 ~ {judge_pre}]""".format(demo=demo,
                                                    prior=prior,
                                                    off=off,
                                                    control2=control2,
                                                   judge_pre=judge_pre)
min_iv = IV2SLS.from_formula(min_formula, data = judge).fit()
max_iv = IV2SLS.from_formula(max_formula, data = judge).fit()

min_iv_est = round(min_iv.params.jail3, 3)
max_iv_est = round(max_iv.params.jail3, 3)

In [None]:
print(min_iv.summary)

In [None]:
print(max_iv.summary)

In [None]:
%%capture
from rpy2 import robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr
pandas2ri.activate()
utils = importr('utils')
utils.install_packages('SteinIV')
SteinIV = importr('SteinIV')

In [None]:
#-- JIVE main results
#- minimum controls
y = judge['guilt']
X_min = judge.loc[:, ['jail3', 'day', 'day2', 't1', 't2', 't3', 't4', 't5', 'bailDate']]
X_min['intercept'] = 1

Z_min = judge.loc[:, judge_pre.split('+') + ['day', 'day2', 't1', 't2', 't3', 't4', 't5', 'bailDate']]
Z_min['intercept'] = 1

In [None]:
y = robjects.globalenv['y'] = y
X_min = robjects.globalenv['X_min'] = np.array(X_min)
Z_min = robjects.globalenv['Z_min'] = np.array(Z_min)

In [None]:
min_jive = SteinIV.jive_est(y = y, X = X_min, Z = Z_min)[0][0]

In [None]:
X_max = judge.loc[:, ['jail3', 'white', 'age', 'male', 'black',
         'possess', 'robbery', 
         'prior_guilt', 'onePrior', 'priorWI5', 'prior_felChar', 'priorCases',
         'DUI1st', 'drugSell', 'aggAss', 'fel', 'mis', 'sum',
         'threePriors',
         'F1', 'F2', 'F3',
         'M', 'M1', 'M2', 'M3',
         'day', 'day2', 'bailDate', 
         't1', 't2', 't3', 't4', 't5']]

X_max['intercept'] = 1

Z_max = judge.loc[:, judge_pre.split('+') + ['white', 'age', 'male', 'black',
         'possess', 'robbery', 
         'prior_guilt', 'onePrior', 'priorWI5', 'prior_felChar', 'priorCases',
         'DUI1st', 'drugSell', 'aggAss', 'fel', 'mis', 'sum',
         'threePriors',
         'F1', 'F2', 'F3',
         'M', 'M1', 'M2', 'M3',
         'day', 'day2', 'bailDate', 
         't1', 't2', 't3', 't4', 't5']]
Z_max['intercept'] = 1

X_max = robjects.globalenv['X_max'] = np.array(X_max)
Z_max = robjects.globalenv['Z_max'] = np.array(Z_max)

In [None]:
max_jive = SteinIV.jive_est(y = y, X = X_max, Z = Z_max)[0][0]

In [None]:
min_jive_est = float(np.round(min_jive, 3))
max_jive_est = float(np.round(max_jive, 3))

print("Min. JIVE, Max. JIVE")
print(min_jive_est, max_jive_est)

In [None]:
# Report all estimates
estimates = [[min_ols_est, min_iv_est, min_jive_est], [max_ols_est, max_iv_est, max_jive_est]]
col_names = ["OLS", "2SLS", "JIVE"]
print(tabulate(estimates, headers = col_names, tablefmt="fancy_grid"))

#### QUESTION
- Interpret the coefficient on our two IV estimators?  How do they compare to our OLS estimate?
- What is your conclusion about the effect that cash bail has on adjudication?  Speculate about the channels by which cash bail has this effect. 
- Describe the four sub-populations (e.g., always takers, never takers, defiers and compliers) in the context of Stevenson's study.
- Discuss the plausibility of each of the 5 IV assumptions in Stevenson's case.  
- Draw a DAG that must be true for Stevenson's JIVE estimates to be consistent?  Which assumptions are contained in this DAG and which ones are not easily visualized? 
- Assume judge A is stricter than judge B.  Monotonicity requires that if judge B sets a lower bail amount for that individual, then judge A will always set a higher for that individual hypothetically than judge B.  Provide some examples where you think this may be violated.  


