In [1]:
import random
import numpy as np
import pandas as pd
from sklearn.datasets import make_spd_matrix
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from numpy.random import normal, multivariate_normal
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from dgp import DGP
from inference import Inference

In [2]:
def plot_hist(tau1, tau0):
    
    fig, (ax_1, ax_2) = plt.subplots(1, 2, figsize=(14, 5))
    ax_1.hist(tau1, color="b")
    ax_1.set_xlabel("tau1")
    ax_1.set_ylabel("Count")
    ax_1.axvline(x=np.mean(tau1), ymin=0, ymax=3000, color="k", linestyle="--")

    ax_2.hist(tau0, color="r")
    ax_2.set_xlabel("tau0")
    ax_2.set_ylabel("Count")
    ax_2.axvline(x=np.mean(tau0), ymin=0, ymax=3000, color="k", linestyle="--")
    return fig

def simulation(n, modelY='1', modelDA='1', ate=0, ntrials=1000):
    phi_tau1s, phi_tau0s, phi_theta = np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials)
    for i in range(ntrials):
        dgp = DGP(modelY,modelDA,n,tau=ate)
        inf = Inference(dgp.Y, dgp.D, dgp.A, modelDA, tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        phi_tau1s[i] = inf.phi_tau1
        phi_tau0s[i] = inf.phi_tau0
        phi_theta[i] = inf.phi_theta
    return phi_tau1s, phi_tau0s, phi_theta

In [3]:
for i in range(6):
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='9', ate=0, ntrials=2000)
    print("Coverage rate with 2 stratum for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='10', ate=0, ntrials=2000)
    print("Coverage rate with 4 stratum for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))

print("Tau=1:")
for i in range(6):
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='9', ate=1, ntrials=2000)
    print("Coverage rate with 2 stratum for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='10', ate=1, ntrials=2000)
    print("Coverage rate with 4 stratum for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))

Coverage rate for modelY=1:  0.9515 0.9415 0.944
Coverage rate under paired inference for modelY=1:  0.947 0.955 0.9495
Coverage rate for modelY=2:  0.9525 0.9575 0.953
Coverage rate under paired inference for modelY=2:  0.9435 0.9505 0.9415
Coverage rate for modelY=3:  0.947 0.9515 0.951
Coverage rate under paired inference for modelY=3:  0.953 0.944 0.95
Coverage rate for modelY=4:  0.9505 0.9445 0.942
Coverage rate under paired inference for modelY=4:  0.9535 0.945 0.954
Coverage rate for modelY=5:  0.948 0.937 0.951
Coverage rate under paired inference for modelY=5:  0.9375 0.94 0.9405
Coverage rate for modelY=6:  0.94 0.949 0.9475
Coverage rate under paired inference for modelY=6:  0.9445 0.9465 0.9425
Tau=1:
Coverage rate for modelY=1:  0.9595 0.9485 0.9455
Coverage rate under paired inference for modelY=1:  0.9505 0.9465 0.9475
Coverage rate for modelY=2:  0.955 0.949 0.9595
Coverage rate under paired inference for modelY=2:  0.9485 0.9435 0.9345
Coverage rate for modelY=3:  0.9

In [4]:
for i in range(6):
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='8', ate=0, ntrials=2000)
    print("Coverage rate for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='8p', ate=0, ntrials=2000)
    print("Coverage rate under paired inference for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))

print("Tau=1:")
for i in range(6):
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='8', ate=1, ntrials=2000)
    print("Coverage rate for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))
    phi_tau1s, phi_tau0s, phi_theta = simulation(1000, modelY=str(i+1), modelDA='8p', ate=1, ntrials=2000)
    print("Coverage rate under paired inference for modelY={}: ".format(i+1), np.mean(phi_tau1s), np.mean(phi_tau0s), np.mean(phi_theta))

KeyboardInterrupt: 

In [None]:
for i in range(4):
    print("ModelY={}".format(i+1))
    mse = np.zeros((5,3))
    for j in range(5):
        tau1s, tau0s = simulation(1000, modelY=str(i+1), modelDA=str(j+1), ntrials=2000)
        theta = .5*tau1s + .5*tau0s
        mse[j,0] = np.mean(theta**2)
        mse[j,1] = np.mean(tau1s**2)
        mse[j,2] = np.mean(tau0s**2)
    mse = mse/mse[4]
    print(mse.T)
    print(" ")

ModelY=1


UnboundLocalError: local variable 'n' referenced before assignment