In [1]:
import numpy as np
import pandas as pd

In [2]:

def generate_iv_data(n=1000, random_seed=42):
    np.random.seed(random_seed)
    
    # Instrument (Z) - random assignment
    instrument = np.random.binomial(1, 0.5, size=n)
    
    # Treatment (D) depends partly on the instrument
    # Let's say P(D=1) = 0.2 + 0.3*Z
    p_treatment = 0.2 + 0.3 * instrument
    treatment = np.random.binomial(1, p_treatment, size=n)
    
    # Covariates (for possible confounding)
    X1 = np.random.normal(0, 1, size=n)
    X2 = np.random.normal(5, 2, size=n)
    
    # Outcome (Y) depends on treatment and covariates
    # True effect of treatment on outcome is, say, +2
    outcome = 10 + 2 * treatment + 0.5 * X1 + 0.7 * X2 + np.random.normal(0, 1, size=n)
    
    df_iv = pd.DataFrame({
        'instrument': instrument,
        'treatment': treatment,
        'X1': X1,
        'X2': X2,
        'outcome': outcome
    })
    
    return df_iv

# Example usage:
df_iv = generate_iv_data()
df_iv.head()

Unnamed: 0,instrument,treatment,X1,X2,outcome
0,0,0,-0.877983,8.74193,16.02707
1,1,1,-0.82688,5.779228,18.143576
2,1,1,-0.226479,3.263415,12.331073
3,1,1,0.367366,6.069258,16.399883
4,0,1,0.913585,-0.271495,12.907288


In [3]:
# Naive OLS
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Naive OLS formula: outcome ~ treatment + X1 + X2
ols_model = ols("outcome ~ treatment + X1 + X2", data=df_iv).fit()
print("Naive OLS results:")
print(ols_model.summary())

naive_effect = ols_model.params["treatment"]
print(f"\nNaive OLS estimated treatment effect: {naive_effect:.3f}")

Naive OLS results:
                            OLS Regression Results                            
Dep. Variable:                outcome   R-squared:                       0.776
Model:                            OLS   Adj. R-squared:                  0.775
Method:                 Least Squares   F-statistic:                     1148.
Date:                Mon, 27 Jan 2025   Prob (F-statistic):          1.24e-322
Time:                        20:11:50   Log-Likelihood:                -1407.5
No. Observations:                1000   AIC:                             2823.
Df Residuals:                     996   BIC:                             2843.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      9.8855      0.086 

In [4]:
# 3. 2SLS Using the linearmodels package
# If not installed: pip install linearmodels
from linearmodels.iv import IV2SLS

# We can directly specify a formula that indicates:
# outcome ~ 1 + X1 + X2  [endogenous(treatment) ~ instrument]
iv_model = IV2SLS.from_formula(
    'outcome ~ 1 + X1 + X2 [treatment ~ instrument]', 
    data=df_iv
).fit(cov_type='unadjusted')

print("\nIV (2SLS) results using linearmodels:")
print(iv_model.summary)

iv_effect = iv_model.params["treatment"]
print(f"\n2SLS estimated treatment effect: {iv_effect:.3f}")


IV (2SLS) results using linearmodels:
                          IV-2SLS Estimation Summary                          
Dep. Variable:                outcome   R-squared:                      0.7751
Estimator:                    IV-2SLS   Adj. R-squared:                 0.7745
No. Observations:                1000   F-statistic:                    2550.4
Date:                Mon, Jan 27 2025   P-value (F-stat)                0.0000
Time:                        20:12:57   Distribution:                  chi2(3)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      9.9194     0.1084     91.499     0.0000      9.7070      10.132
X1           