In [1]:
import random
import numpy as np
import pandas as pd
from sklearn.datasets import make_spd_matrix
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from numpy.random import normal, multivariate_normal
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from dgp import DGP
from inference import Inference

In [2]:
def plot_hist(tau1, tau0):
    
    fig, (ax_1, ax_2) = plt.subplots(1, 2, figsize=(14, 5))
    ax_1.hist(tau1, color="b")
    ax_1.set_xlabel("tau1")
    ax_1.set_ylabel("Count")
    ax_1.axvline(x=np.mean(tau1), ymin=0, ymax=3000, color="k", linestyle="--")

    ax_2.hist(tau0, color="r")
    ax_2.set_xlabel("tau0")
    ax_2.set_ylabel("Count")
    ax_2.axvline(x=np.mean(tau0), ymin=0, ymax=3000, color="k", linestyle="--")
    return fig

def reject_prob(n, modelY='1', modelDA='1', ate=0, ntrials=1000):
    phi_tau1s, phi_tau0s, phi_thetas = np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials)
    for i in range(ntrials):
        dgp = DGP(modelY,modelDA,n,tau=ate)
        inf = Inference(dgp.Y, dgp.D, dgp.A, modelDA, tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        phi_tau1, phi_tau0, phi_theta = inf.inference()
        phi_tau1s[i] = phi_tau1
        phi_tau0s[i] = phi_tau0
        phi_thetas[i] = phi_theta
    return phi_tau1s, phi_tau0s, phi_thetas

def risk(n, modelY='1', modelDA='1', ate=0, ntrials=1000):
    tau1s, tau0s, thetas = np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials)
    for i in range(ntrials):
        dgp = DGP(modelY,modelDA,n,tau=ate)
        inf = Inference(dgp.Y, dgp.D, dgp.A, modelDA, tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        tau1s[i] = inf.tau1
        tau0s[i] = inf.tau0
        thetas[i] = inf.theta
    return tau1s, tau0s, thetas

def variance(n, modelY='1', modelDA='1', ate=0, ntrials=1000):
    var_tau1s, var_tau0s, var_thetas = np.zeros(ntrials), np.zeros(ntrials), np.zeros(ntrials)
    for i in range(ntrials):
        dgp = DGP(modelY,modelDA,n,tau=ate)
        inf = Inference(dgp.Y, dgp.D, dgp.A, modelDA, tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        phi_tau1, phi_tau0, phi_theta = inf.inference()
        var_tau1s[i] = inf.var_tau1
        var_tau0s[i] = inf.var_tau0
        var_thetas[i] = inf.var_theta
    return var_tau1s, var_tau0s, var_thetas

In [3]:
# report MSE
for i in range(6):
    print("ModelY={}".format(i+1))
    mse = np.zeros((11,3))
    for j in range(11):
        tau1s, tau0s, thetas = risk(1000, modelY=str(i+1), modelDA=str(j+1), ntrials=2000)
        mse[j,0] = np.mean(thetas**2)
        mse[j,1] = np.mean(tau1s**2)
        mse[j,2] = np.mean(tau0s**2)
    mse = mse/mse[7]
    mse = mse.T
    for r in range(3):
        for k in range(11):
            if k<10:
                print("{:.3f} & ".format(mse[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(mse[r,k]))

In [None]:
# report Reject Probability with ate=0
for i in range(6):
    print("ModelY={}".format(i+1))
    prob = np.zeros((5,3))
    for j, d in enumerate(['1', '2', '8', '9', '10']):
        phi_tau1s, phi_tau0s, phi_theta = reject_prob(1000, modelY=str(i+1), modelDA=d, ntrials=2000)
        prob[j,0] = np.mean(phi_theta)
        prob[j,1] = np.mean(phi_tau1s)
        prob[j,2] = np.mean(phi_tau0s)
    prob = prob.T
    for r in range(3):
        for k in range(5):
            if k<4:
                print("{:.3f} & ".format(prob[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(prob[r,k]))

ModelY=1
0.064 & 0.057 & 0.057 & 0.054 & 0.051 \\
0.053 & 0.061 & 0.055 & 0.055 & 0.046 \\
0.058 & 0.048 & 0.052 & 0.059 & 0.044 \\
ModelY=2
0.053 & 0.054 & 0.052 & 0.060 & 0.050 \\
0.054 & 0.048 & 0.052 & 0.055 & 0.051 \\
0.052 & 0.052 & 0.045 & 0.052 & 0.050 \\
ModelY=3
0.051 & 0.045 & 0.044 & 0.053 & 0.047 \\
0.052 & 0.052 & 0.049 & 0.055 & 0.048 \\
0.054 & 0.056 & 0.047 & 0.051 & 0.065 \\
ModelY=4
0.058 & 0.060 & 0.048 & 0.051 & 0.056 \\
0.050 & 0.058 & 0.051 & 0.052 & 0.059 \\
0.067 & 0.050 & 0.051 & 0.056 & 0.053 \\
ModelY=5
0.057 & 0.049 & 0.057 & 0.058 & 0.057 \\
0.052 & 0.050 & 0.047 & 0.061 & 0.060 \\
0.062 & 0.056 & 0.057 & 0.059 & 0.060 \\
ModelY=6
0.048 & 0.053 & 0.051 & 0.045 & 0.050 \\
0.052 & 0.057 & 0.057 & 0.050 & 0.051 \\
0.047 & 0.050 & 0.054 & 0.053 & 0.049 \\


In [None]:
for i in range(6):
    print("ModelY={}".format(i+1))
    prob = np.zeros((5,3))
    for j, d in enumerate(['1', '2', '8', '9', '10']):
        phi_tau1s, phi_tau0s, phi_theta = reject_prob(1000, modelY=str(i+1), modelDA=d, ate=0.03, ntrials=2000)
        prob[j,0] = np.mean(phi_theta)
        prob[j,1] = np.mean(phi_tau1s)
        prob[j,2] = np.mean(phi_tau0s)
    prob = prob.T
    for r in range(3):
        for k in range(5):
            if k<4:
                print("{:.3f} & ".format(prob[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(prob[r,k]))

ModelY=1
0.343 & 0.333 & 0.997 & 0.782 & 0.964 \\
0.187 & 0.184 & 0.919 & 0.488 & 0.778 \\
0.197 & 0.188 & 0.915 & 0.488 & 0.790 \\
ModelY=2
0.356 & 0.344 & 0.997 & 0.781 & 0.974 \\
0.210 & 0.190 & 0.918 & 0.502 & 0.783 \\
0.204 & 0.198 & 0.930 & 0.479 & 0.780 \\
ModelY=3
0.357 & 0.351 & 0.998 & 0.814 & 0.971 \\
0.203 & 0.203 & 0.921 & 0.514 & 0.794 \\
0.194 & 0.211 & 0.915 & 0.521 & 0.784 \\
ModelY=4
0.259 & 0.237 & 0.750 & 0.496 & 0.682 \\
0.120 & 0.116 & 0.639 & 0.242 & 0.443 \\
0.326 & 0.303 & 0.632 & 0.482 & 0.603 \\
ModelY=5
0.059 & 0.059 & 0.069 & 0.071 & 0.068 \\
0.051 & 0.052 & 0.068 & 0.056 & 0.052 \\
0.052 & 0.062 & 0.067 & 0.055 & 0.062 \\
ModelY=6
0.357 & 0.346 & 0.997 & 0.771 & 0.967 \\
0.190 & 0.201 & 0.917 & 0.492 & 0.774 \\
0.203 & 0.196 & 0.922 & 0.473 & 0.772 \\


In [None]:
for tau in [0, 0.02, 0.05, 0.1, 0.2, 0.3]:
    print("tau={}".format(tau))
    prob = np.zeros((3,5))
    for j, d in enumerate(['1', '2', '8', '9', '10']):
        phi_tau1s, phi_tau0s, phi_theta = reject_prob(1000, modelY='5', modelDA=d, ate=tau, ntrials=2000)
        prob[0,j] = np.mean(phi_theta)
        prob[1,j] = np.mean(phi_tau1s)
        prob[2,j] = np.mean(phi_tau0s)
    for r in range(3):
        for k in range(5):
            if k<4:
                print("{:.3f} & ".format(prob[r,k]), end = '')
            else:
                print("{:.3f} \\\\".format(prob[r,k]))

tau=0
0.052 & 0.046 & 0.053 & 0.042 & 0.056 \\
0.050 & 0.052 & 0.048 & 0.052 & 0.053 \\
0.054 & 0.051 & 0.050 & 0.052 & 0.066 \\
tau=0.02
0.058 & 0.058 & 0.064 & 0.059 & 0.056 \\
0.052 & 0.059 & 0.053 & 0.059 & 0.052 \\
0.046 & 0.055 & 0.059 & 0.051 & 0.045 \\
tau=0.05
0.092 & 0.092 & 0.144 & 0.096 & 0.127 \\
0.064 & 0.061 & 0.123 & 0.074 & 0.078 \\
0.060 & 0.058 & 0.131 & 0.077 & 0.077 \\
tau=0.1
0.234 & 0.255 & 0.573 & 0.294 & 0.391 \\
0.127 & 0.123 & 0.492 & 0.140 & 0.215 \\
0.130 & 0.135 & 0.540 & 0.158 & 0.218 \\
tau=0.2
0.749 & 0.746 & 0.992 & 0.863 & 0.948 \\
0.432 & 0.432 & 0.980 & 0.538 & 0.722 \\
0.457 & 0.439 & 0.991 & 0.557 & 0.762 \\
tau=0.3
0.988 & 0.982 & 1.000 & 0.997 & 1.000 \\
0.802 & 0.804 & 1.000 & 0.915 & 0.983 \\
0.807 & 0.829 & 1.000 & 0.933 & 0.987 \\


In [10]:
for i in range(6):
    print("ModelY={}".format(i+1))
    res = np.zeros((2*4,3))
    for j, d in enumerate(['8', '8p']):
        phi_tau1s, phi_tau0s, phi_theta = reject_prob(1000, modelY=str(i+1), modelDA=d, ate=0, ntrials=2000)
        res[j,0] = np.mean(phi_theta)
        res[j,1] = np.mean(phi_tau1s)
        res[j,2] = np.mean(phi_tau0s)
    for j, d in enumerate(['8', '8p']):
        phi_tau1s, phi_tau0s, phi_theta = reject_prob(1000, modelY=str(i+1), modelDA=d, ate=0.02, ntrials=2000)
        res[2+j,0] = np.mean(phi_theta)
        res[2+j,1] = np.mean(phi_tau1s)
        res[2+j,2] = np.mean(phi_tau0s)
    for j, d in enumerate(['8', '8p']):
        phi_tau1s, phi_tau0s, phi_theta = variance(1000, modelY=str(i+1), modelDA=d, ate=0, ntrials=2000)
        res[4+j,0] = np.mean(phi_theta)
        res[4+j,1] = np.mean(phi_tau1s)
        res[4+j,2] = np.mean(phi_tau0s)
    for j, d in enumerate(['8', '8p']):
        phi_tau1s, phi_tau0s, phi_theta = variance(1000, modelY=str(i+1), modelDA=d, ate=0.02, ntrials=2000)
        res[6+j,0] = np.mean(phi_theta)
        res[6+j,1] = np.mean(phi_tau1s)
        res[6+j,2] = np.mean(phi_tau0s)
    res = res.T
    for r in range(3):
        for k in range(8):
            if k<7:
                if k % 2==1:
                    print("{:.4f} & & ".format(res[r,k]), end = '')
                else:
                    print("{:.4f} & ".format(res[r,k]), end = '')
            else:
                print("{:.4f} \\\\".format(res[r,k]))

ModelY=1
0.0465 & 0.0460 & 0.8920 & 0.8970 & 0.0100 & 0.0100 & 0.0100 & 0.0100 \\
0.0550 & 0.0535 & 0.5895 & 0.6190 & 0.0200 & 0.0200 & 0.0200 & 0.0200 \\
0.0580 & 0.0465 & 0.6285 & 0.6200 & 0.0201 & 0.0200 & 0.0200 & 0.0200 \\
ModelY=2


KeyboardInterrupt: 