In [1]:
import random
import numpy as np
import pandas as pd
from sklearn.datasets import make_spd_matrix
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from numpy.random import normal, multivariate_normal
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from dgp import DGP
from inference import Inference

In [2]:
def plot_hist(tau1, tau0):
    
    fig, (ax_1, ax_2) = plt.subplots(1, 2, figsize=(14, 5))
    ax_1.hist(tau1, color="b")
    ax_1.set_xlabel("tau1")
    ax_1.set_ylabel("Count")
    ax_1.axvline(x=np.mean(tau1), ymin=0, ymax=3000, color="k", linestyle="--")

    ax_2.hist(tau0, color="r")
    ax_2.set_xlabel("tau0")
    ax_2.set_ylabel("Count")
    ax_2.axvline(x=np.mean(tau0), ymin=0, ymax=3000, color="k", linestyle="--")
    return fig

def reject_prob(n, modelY='1', modelDA='1', ate=0, ntrials=1000):
    phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials)
    for i in range(ntrials):
        dgp = DGP(modelY,modelDA,n,tau=ate)
        inf = Inference(dgp.Y, dgp.D, dgp.A, modelDA, tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        phi_tau11, phi_tau10, phi_theta1, phi_theta2, phi_theta12 = inf.inference()
        phi_tau11s[i] = phi_tau11
        phi_tau10s[i] = phi_tau10
        phi_theta1s[i] = phi_theta1
        phi_theta2s[i] = phi_theta2
        phi_theta12s[i] = phi_theta12
    return phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s

def risk(n, modelY='1', modelDA='1', ate=0, ntrials=1000):
    tau11s, tau10s, theta1s, theta2s, theta12s = np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials)
    for i in range(ntrials):
        dgp = DGP(modelY,modelDA,n,tau=ate)
        inf = Inference(dgp.Y, dgp.D, dgp.A, modelDA, tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        tau11s[i] = inf.tau11
        tau10s[i] = inf.tau10
        theta1s[i] = inf.theta1
        theta2s[i] = inf.theta2
        theta12s[i] = inf.theta12
    return tau11s, tau10s, theta1s, theta2s, theta12s

def variance(n, modelY='1', modelDA='1', ate=0, ntrials=1000):
    var_tau11s, var_tau10s, var_theta1s, var_theta2s, var_theta12s = np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials)
    for i in range(ntrials):
        dgp = DGP(modelY,modelDA,n,tau=ate)
        inf = Inference(dgp.Y, dgp.D, dgp.A, modelDA, tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        _ = inf.inference()
        var_tau11s[i] = inf.var_tau11
        var_tau10s[i] = inf.var_tau10
        var_theta1s[i] = inf.var_theta1
        var_theta2s[i] = inf.var_theta2
        var_theta12s[i] = inf.var_theta12
    return var_tau11s, var_tau10s, var_theta1s, var_theta2s, var_theta12s

In [3]:
tau11s, tau10s, theta1s, theta2s, theta12s = risk(1000, modelY=str(1), modelDA=str(1), ntrials=2000)
tau11s_re, tau10s_re, theta1s_re, theta2s_re, theta12s_re = risk(1000, modelY=str(1), modelDA=str(11), ntrials=2000)
print(tau11s_re/tau11s, tau10s_re/tau10s, theta1s_re/theta1s, theta2s_re/theta2s, theta12s_re/theta12s)

# report MSE
for i in range(6):
    print("ModelY={}".format(i+1))
    mse = np.zeros((11,5))
    for j in range(11):
        tau11s, tau10s, theta1s, theta2s, theta12s = risk(1000, modelY=str(i+1), modelDA=str(j+1), ntrials=2000)
        mse[j,0] = np.mean(theta1s**2)
        mse[j,1] = np.mean(theta2s**2)
        mse[j,2] = np.mean(theta12s**2)
        mse[j,3] = np.mean(tau11s**2)
        mse[j,4] = np.mean(tau10s**2)
    mse = mse/mse[7]
    mse = mse.T
    for r in range(5):
        for k in range(11):
            if k<10:
                print("{:.3f} & ".format(mse[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(mse[r,k]))

ModelY=1


KeyboardInterrupt: 

In [4]:
# report Reject Probability with ate=0
for i in range(6):
    print("ModelY={}".format(i+1))
    prob = np.zeros((5,5))
    for j, d in enumerate(['1', '2', '8', '9', '10']):
        phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = reject_prob(1000, modelY=str(i+1), modelDA=d, ntrials=2000)
        prob[j,0] = np.mean(phi_theta1s)
        prob[j,1] = np.mean(phi_theta2s)
        prob[j,2] = np.mean(phi_theta12s)
        prob[j,3] = np.mean(phi_tau11s)
        prob[j,4] = np.mean(phi_tau10s)
    prob = prob.T
    for r in range(5):
        for k in range(5):
            if k<4:
                print("{:.3f} & ".format(prob[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(prob[r,k]))

ModelY=1
0.046 & 0.045 & 0.053 & 0.049 & 0.056 \\
0.054 & 0.053 & 0.057 & 0.050 & 0.065 \\
0.041 & 0.047 & 0.038 & 0.045 & 0.044 \\
0.053 & 0.043 & 0.044 & 0.045 & 0.046 \\
0.043 & 0.043 & 0.052 & 0.052 & 0.054 \\
ModelY=2
0.042 & 0.054 & 0.044 & 0.049 & 0.051 \\
0.043 & 0.055 & 0.043 & 0.054 & 0.054 \\
0.051 & 0.050 & 0.057 & 0.047 & 0.054 \\
0.051 & 0.056 & 0.055 & 0.059 & 0.050 \\
0.045 & 0.057 & 0.043 & 0.044 & 0.045 \\
ModelY=3
0.054 & 0.050 & 0.047 & 0.054 & 0.057 \\
0.050 & 0.047 & 0.041 & 0.043 & 0.051 \\
0.056 & 0.050 & 0.048 & 0.047 & 0.051 \\
0.052 & 0.051 & 0.047 & 0.053 & 0.059 \\
0.051 & 0.058 & 0.044 & 0.052 & 0.051 \\
ModelY=4
0.050 & 0.053 & 0.054 & 0.059 & 0.062 \\
0.044 & 0.043 & 0.052 & 0.055 & 0.051 \\
0.043 & 0.054 & 0.049 & 0.045 & 0.047 \\
0.051 & 0.053 & 0.041 & 0.061 & 0.060 \\
0.048 & 0.065 & 0.056 & 0.048 & 0.049 \\
ModelY=5
0.046 & 0.051 & 0.028 & 0.050 & 0.038 \\
0.051 & 0.045 & 0.045 & 0.047 & 0.048 \\
0.045 & 0.051 & 0.026 & 0.054 & 0.051 \\
0.045 & 0.05

In [7]:
for i in range(6):
    print("ModelY={}".format(i+1))
    prob = np.zeros((5,5))
    for j, d in enumerate(['1', '2', '8', '9', '10']):
        phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = reject_prob(1000, modelY=str(i+1), modelDA=d, ate=0.05, ntrials=2000)
        prob[j,0] = np.mean(phi_theta1s)
        prob[j,1] = np.mean(phi_theta2s)
        prob[j,2] = np.mean(phi_theta12s)
        prob[j,3] = np.mean(phi_tau11s)
        prob[j,4] = np.mean(phi_tau10s)
    prob = prob.T
    for r in range(5):
        for k in range(5):
            if k<4:
                print("{:.3f} & ".format(prob[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(prob[r,k]))

ModelY=1
0.895 & 0.902 & 1.000 & 1.000 & 1.000 \\
0.501 & 0.502 & 1.000 & 0.920 & 0.997 \\
0.099 & 0.112 & 0.488 & 0.214 & 0.371 \\
0.766 & 0.791 & 1.000 & 0.997 & 1.000 \\
0.452 & 0.432 & 1.000 & 0.890 & 0.996 \\
ModelY=2
0.601 & 0.602 & 0.965 & 0.866 & 0.921 \\
0.252 & 0.265 & 0.411 & 0.364 & 0.408 \\
0.072 & 0.073 & 0.255 & 0.116 & 0.186 \\
0.440 & 0.433 & 1.000 & 0.888 & 0.996 \\
0.262 & 0.259 & 0.533 & 0.405 & 0.493 \\
ModelY=3
0.676 & 0.667 & 0.981 & 0.920 & 0.967 \\
0.298 & 0.314 & 0.483 & 0.423 & 0.463 \\
0.094 & 0.076 & 0.269 & 0.133 & 0.220 \\
0.517 & 0.506 & 1.000 & 0.949 & 1.000 \\
0.278 & 0.292 & 0.593 & 0.492 & 0.561 \\
ModelY=4
0.417 & 0.434 & 0.979 & 0.791 & 0.942 \\
0.189 & 0.190 & 0.461 & 0.314 & 0.425 \\
0.061 & 0.069 & 0.278 & 0.099 & 0.147 \\
0.222 & 0.221 & 1.000 & 0.625 & 0.973 \\
0.330 & 0.313 & 0.565 & 0.464 & 0.540 \\
ModelY=5
0.102 & 0.099 & 0.611 & 0.101 & 0.146 \\
0.068 & 0.064 & 0.188 & 0.077 & 0.077 \\
0.053 & 0.059 & 0.051 & 0.056 & 0.052 \\
0.086 & 0.08

In [8]:
for tau in [0, 0.02, 0.05, 0.1, 0.2, 0.3]:
    print("tau={}".format(tau))
    prob = np.zeros((5,5))
    for j, d in enumerate(['1', '2', '8', '9', '10']):
        phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = reject_prob(1000, modelY='5', modelDA=d, ate=tau, ntrials=2000)
        prob[j,0] = np.mean(phi_theta1s)
        prob[j,1] = np.mean(phi_theta2s)
        prob[j,2] = np.mean(phi_theta12s)
        prob[j,3] = np.mean(phi_tau11s)
        prob[j,4] = np.mean(phi_tau10s)
    for r in range(5):
        for k in range(5):
            if k<4:
                print("{:.3f} & ".format(prob[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(prob[r,k]))

tau=0
0.064 & 0.058 & 0.056 & 0.058 & 0.052 \\
0.053 & 0.050 & 0.046 & 0.050 & 0.048 \\
0.035 & 0.033 & 0.029 & 0.026 & 0.032 \\
0.053 & 0.055 & 0.053 & 0.054 & 0.055 \\
0.065 & 0.053 & 0.051 & 0.061 & 0.059 \\
tau=0.02
0.059 & 0.046 & 0.048 & 0.047 & 0.051 \\
0.054 & 0.055 & 0.050 & 0.051 & 0.048 \\
0.134 & 0.062 & 0.030 & 0.114 & 0.070 \\
0.057 & 0.057 & 0.046 & 0.051 & 0.045 \\
0.057 & 0.053 & 0.058 & 0.056 & 0.059 \\
tau=0.05
0.099 & 0.064 & 0.054 & 0.090 & 0.056 \\
0.106 & 0.054 & 0.053 & 0.099 & 0.076 \\
0.615 & 0.189 & 0.050 & 0.547 & 0.219 \\
0.101 & 0.066 & 0.044 & 0.077 & 0.073 \\
0.127 & 0.079 & 0.065 & 0.116 & 0.090 \\
tau=0.1
0.244 & 0.120 & 0.061 & 0.190 & 0.111 \\
0.242 & 0.117 & 0.065 & 0.204 & 0.114 \\
0.993 & 0.622 & 0.133 & 0.976 & 0.689 \\
0.259 & 0.125 & 0.062 & 0.207 & 0.127 \\
0.359 & 0.167 & 0.059 & 0.285 & 0.143 \\
tau=0.2
0.712 & 0.345 & 0.076 & 0.554 & 0.320 \\
0.714 & 0.337 & 0.079 & 0.553 & 0.311 \\
1.000 & 0.994 & 0.453 & 1.000 & 0.995 \\
0.745 & 0.350 & 0

In [12]:
for i in range(6):
    print("ModelY={}".format(i+1))
    res = np.zeros((2*4,5))
    for j, d in enumerate(['8', '8p']):
        phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = reject_prob(1000, modelY=str(i+1), modelDA=d, ate=0, ntrials=2000)
        res[j,0] = np.mean(phi_theta1s)
        res[j,1] = np.mean(phi_theta2s)
        res[j,2] = np.mean(phi_theta12s)
        res[j,3] = np.mean(phi_tau11s)
        res[j,4] = np.mean(phi_tau10s)
    for j, d in enumerate(['8', '8p']):
        phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = reject_prob(1000, modelY=str(i+1), modelDA=d, ate=0.05, ntrials=2000)
        res[2+j,0] = np.mean(phi_theta1s)
        res[2+j,1] = np.mean(phi_theta2s)
        res[2+j,2] = np.mean(phi_theta12s)
        res[2+j,3] = np.mean(phi_tau11s)
        res[2+j,4] = np.mean(phi_tau10s)
    for j, d in enumerate(['8', '8p']):
        phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = variance(1000, modelY=str(i+1), modelDA=d, ate=0, ntrials=2000)
        res[4+j,0] = np.mean(phi_theta1s)
        res[4+j,1] = np.mean(phi_theta2s)
        res[4+j,2] = np.mean(phi_theta12s)
        res[4+j,3] = np.mean(phi_tau11s)
        res[4+j,4] = np.mean(phi_tau10s)
    for j, d in enumerate(['8', '8p']):
        phi_tau11s, phi_tau10s, phi_theta1s, phi_theta2s, phi_theta12s = variance(1000, modelY=str(i+1), modelDA=d, ate=0.05, ntrials=2000)
        res[6+j,0] = np.mean(phi_theta1s)
        res[6+j,1] = np.mean(phi_theta2s)
        res[6+j,2] = np.mean(phi_theta12s)
        res[6+j,3] = np.mean(phi_tau11s)
        res[6+j,4] = np.mean(phi_tau10s)
    res = res.T
    for r in range(5):
        for k in range(8):
            if k<7:
                if k % 2==1:
                    print("{:.4f} & & ".format(res[r,k]), end = '')
                else:
                    print("{:.4f} & ".format(res[r,k]), end = '')
            else:
                print("{:.4f} \\\\".format(res[r,k]))

ModelY=1
0.0500 & 0.0525 & & 1.0000 & 1.0000 & & 0.0100 & 0.0100 & & 0.0100 & 0.0100 \\
0.0555 & 0.0505 & & 1.0000 & 1.0000 & & 0.0100 & 0.0100 & & 0.0100 & 0.0100 \\
0.0590 & 0.0530 & & 0.5055 & 0.4910 & & 0.0100 & 0.0100 & & 0.0100 & 0.0100 \\
0.0620 & 0.0550 & & 1.0000 & 1.0000 & & 0.0200 & 0.0200 & & 0.0199 & 0.0200 \\
0.0530 & 0.0545 & & 1.0000 & 0.9995 & & 0.0200 & 0.0201 & & 0.0200 & 0.0200 \\
ModelY=2
0.0480 & 0.0565 & & 0.9450 & 0.9490 & & 0.0738 & 0.0737 & & 0.0738 & 0.0738 \\
0.0510 & 0.0590 & & 0.4060 & 0.4050 & & 0.1153 & 0.1154 & & 0.1153 & 0.1153 \\
0.0485 & 0.0480 & & 0.2715 & 0.2850 & & 0.0217 & 0.0217 & & 0.0217 & 0.0217 \\
0.0525 & 0.0635 & & 1.0000 & 1.0000 & & 0.0409 & 0.0408 & & 0.0409 & 0.0409 \\
0.0535 & 0.0555 & & 0.5165 & 0.5380 & & 0.1500 & 0.1500 & & 0.1501 & 0.1501 \\
ModelY=3
0.0475 & 0.0415 & & 0.9750 & 0.9730 & & 0.0614 & 0.0614 & & 0.0613 & 0.0612 \\
0.0485 & 0.0435 & & 0.4635 & 0.4650 & & 0.0974 & 0.0974 & & 0.0973 & 0.0972 \\
0.0535 & 0.0400 & & 0.262