In [1]:
import pandas as pd
import numpy as np
import math

import statsmodels.api as sm
from statsmodels.sandbox.regression.gmm import IV2SLS

### Part 1) - Part 2)

In [92]:
sigma = .1
var = [[1, sigma ], [sigma , 1]]
mean = [0., 0.]
N = int(1e6)

ey, ex = np.random.multivariate_normal(mean, var, size = N).transpose()

In [93]:
pz = .3

z = np.random.binomial(1,pz,N).astype(np.float)

In [94]:
beta_0x = -.5
beta_1x = 2

x = (beta_0x + beta_1x*z + ex > 0).astype(np.float) 

In [47]:
beta_0y = 1
beta_1y = 1

y = beta_0y + beta_1y*x + ey

### Part 5

$E(y(1) -y(0)) = E(\beta_{0y} + \beta_{1jy} + \epsilon_{yj} |x=1 ) - E(\beta_{0y} + \epsilon_{yj} | x_j=0 ) = E(\beta_{1jy} |x_j =1) = \beta_{1y} + E(\epsilon_{yj}|x_j = 1) -E(\epsilon_{yj}|x_j=0) $

In [60]:
y1 = (y*x).sum()/x.sum()
y0 =  (y*(1-x)).sum()/(1-x).sum()

y1 - y0

1.116165139029349

### Part 6

* What happens to the OLS estimator applied to this data?
It does not recover the average treatment effect of $X$ on $Y$
* Is that expected or unexpected?
It is biased upward. This is expected because $X$ is correlated with the error term

### Part 7

Using the IV estimator is much closer

In [63]:
y1 = (y*z).sum()/z.sum()
y0 =  (y*(1-z)).sum()/(1-z).sum()

x1 = (x*z).sum()/z.sum()
x0 =  (x*(1-z)).sum()/(1-z).sum()

print (y1 - y0)/(x1 - x0)


1.0016771452554731


### Part 8 


* If $x_i = 1$, and $z_i = 1$ (compliers) then $\epsilon_{xj} > -\beta_{0x} - \beta_{1x}$
* If $x_i = 0$, and $z_i = 1$ (never takers) then $\epsilon_{xj} \leq -\beta_{0x} - \beta_{1x}$
* If $x_i = 1$, and $z_i = 0$ (defiers) then $\epsilon_{xj} > -\beta_{0x} $
* If $x_i = 0$, and $z_i = 0$ (never takers) then $\epsilon_{xj} \leq -\beta_{0x} $


### Part 9 

Above we can see that defiers have $\epsilon_{xj} \leq -\beta_{0x} + \beta_{1x}$
When  $min(\epsilon_{xj}) > -\beta_{0x} + \beta_{1x}$, we have no defiers

### Part 10

In [80]:
c = (x*z).sum()/N 
print 'compliers: %s'%c

d = ((1-x)*z).sum()/N
print 'never takers: %s'%d

a =   (x*(1-z)).sum()/N
print 'defiers: %s'%a

n= ((1-x)*(1-z)).sum()/N
print 'never takers: %s'%n


compliers: 0.280032
never takers: 0.020027
defiers: 0.216182
never takers: 0.483759


### Part 11

In [89]:
c = ( ex > - beta_0x -  beta_1x ).astype(np.float) 

x1c = x*c #x is 1 and a complier
x0c = (1-x)*c #x is 0 and a complier


y1c = (y*x1c).sum()/x1c.sum()
y0c =  (y*x0c).sum()/x0c.sum()

print y1c - y0c

1.095965832826491


### Part 12 

The average treatment effect is the LATE. The IV estimate is not the LATE

### Part 13

Since $\beta_{1jy} =1$ for all $j$, the treatment effect and the local average treatment effect should be the same. Everyone has the same treatment effect essentially. As a result, it is unclear how to interpret the IV estimate

### Part 14

In [97]:
beta_1jy = np.random.normal(1, 1, size = N)

yj = beta_0y + beta_1jy*x + ey

### Part 15


$E(y(1) -y(0)) = E(\beta_{0y} + \beta_{1jy} + \epsilon_{yj} |x=1 ) - E(\beta_{0y} + \epsilon_{yj} | x_j=0 ) =  E(\beta_{1jy} + \epsilon_{yj}|x_j = 1) -E(\epsilon_{yj}|x_j=0) $

### Part 16

Yes it does, this is expected because the individual coefficient was independent from the data

In [98]:
yj1 = (yj*x).sum()/x.sum()
yj0 =  (yj*(1-x)).sum()/(1-x).sum()

print yj1 - yj0

1.1159159518296389


### Part 17

In [99]:
yj1 = (yj*z).sum()/z.sum()
yj0 =  (yj*(1-z)).sum()/(1-z).sum()

x1 = (x*z).sum()/z.sum()
x0 =  (x*(1-z)).sum()/(1-z).sum()

print (yj1 - yj0)/(x1 - x0)

1.0059753143335015


### Part 18

They do not

In [100]:
yj1c = (yj*x1c).sum()/x1c.sum()
yj0c =  (yj*x0c).sum()/x0c.sum()

print yj1c - yj0c

0.002870177316993461
