In [1]:
from dgp import DGP_Finite, DGP
from inference import Inference2, Inference
import numpy as np
import pandas as pd 

# generate multuple sample size, multiple model designs, multiple inference method,
# a single estimator
dgp = DGP_Finite('1',1000)
Yobs, D, A = dgp.get_data()
inf = Inference2(Yobs, D, A, dgp.cluster, dgp.tuple_idx, dgp.tau10)
print(inf.inference('mp'), inf.inference('robust'), inf.inference('clustered'))

def cover_rate(sample_size=1000, modelY='1', ntrials=2000):
    cover = np.zeros((ntrials, 3))
    cf_length = np.zeros((ntrials, 3))
    for i in range(ntrials):
        dgp = DGP(modelY, '8', sample_size)
        Yobs, D, A = dgp.get_data()
        inf = Inference2(Yobs, D, A, dgp.cluster, dgp.tuple_idx, dgp.tau)
        #inf1 = Inference(dgp.Y, dgp.D, dgp.A, '8', tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        #cover[i,0] = 1 - inf1.inference()[1]
        #cf_length[i,0] = inf1.se_tau10*1.96*2
        #inf.inference('mp')
        #print(inf.se_tau10 - inf1.se_tau10)
        cover[i,0] = inf.inference('mp')
        cf_length[i,0] = inf.se_tau10*1.96*2
        cover[i,1] = inf.inference('robust')
        cf_length[i,1] = inf.se_tau10*1.96*2
        cover[i,2] = inf.inference('clustered')
        cf_length[i,2] = inf.se_tau10*1.96*2
    return np.mean(cover, axis=0), np.mean(cf_length, axis=0)


def cover_rate_finite(sample_size=1000, modelY='1', ntrials=2000):
    cover = np.zeros((ntrials, 3))
    cf_length = np.zeros((ntrials, 3))
    dgp = DGP_Finite(modelY, sample_size)
    for i in range(ntrials):
        Yobs, D, A = dgp.get_data()
        inf = Inference2(Yobs, D, A, dgp.cluster, dgp.tuple_idx, dgp.tau10)
        cover[i,0] = inf.inference('mp')
        cf_length[i,0] = inf.se_tau10*1.96*2
        cover[i,1] = inf.inference('robust')
        cf_length[i,1] = inf.se_tau10*1.96*2
        cover[i,2] = inf.inference('clustered')
        cf_length[i,2] = inf.se_tau10*1.96*2
    return np.mean(cover, axis=0), np.mean(cf_length, axis=0)

1 1 1


In [2]:
from joblib import Parallel, delayed
import multiprocessing

modelYs = ['1','2','3','4','5','6','7']
sample_sizes = [50, 100, 200, 500, 1000]

sample_sizes = [40, 80, 160, 480, 1000]

qk_pairs = [(q,k) for q in modelYs for k in sample_sizes]
def processInput(qk):
    q, k = qk
    cover, cf = cover_rate_finite(k, q)
    return (q,k,cover,cf)
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(processInput)(i) for i in qk_pairs)
output = np.zeros((len(modelYs)*3,len(sample_sizes)))
cf_output = np.zeros((len(modelYs)*3,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    output[i*3:i*3+3,j] = cover
    cf_output[i*3:i*3+3,j] = cf

output = np.zeros((len(modelYs)*3*2,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    out = [cover[0], cf[0], cover[1], cf[1], cover[2], cf[2]]
    output[i*6:i*6+6,j] = out
print(output)
pd.DataFrame(output).to_csv("finite_pop_cover_rate_cf.csv")

[[1.         1.         0.999      1.         1.        ]
 [0.81542143 0.45715812 0.13309215 0.04477511 0.02719025]
 [1.         1.         1.         1.         1.        ]
 [2.53355034 1.72143372 1.23120065 0.70872015 0.48874078]
 [1.         1.         1.         1.         1.        ]
 [2.49006966 1.72781725 1.20901385 0.70509456 0.48592166]
 [0.9035     0.9585     0.9575     0.953      0.957     ]
 [1.93398699 1.32782117 0.86369949 0.50307645 0.35863741]
 [0.9895     0.9975     0.9955     0.996      0.9955    ]
 [2.78769777 1.82609874 1.1583222  0.780402   0.52552813]
 [0.9155     0.9445     0.9425     0.954      0.9555    ]
 [2.06230278 1.24228445 0.81880018 0.50105382 0.3545332 ]
 [0.9755     0.9915     0.9825     0.993      0.9945    ]
 [2.1282457  1.88806363 1.2472513  0.66086087 0.46118482]
 [0.9955     0.999      0.997      0.9995     0.9995    ]
 [2.58735149 2.23462671 1.56360546 0.91766052 0.65406995]
 [0.9985     1.         0.999      1.         1.        ]
 [3.18353552 2

In [3]:
from joblib import Parallel, delayed
import multiprocessing

modelYs = ['1','2','3','4','5','6','7']
sample_sizes = [50, 100, 200, 500, 1000]

sample_sizes = [40, 80, 160, 480, 1000]

qk_pairs = [(q,k) for q in modelYs for k in sample_sizes]
def processInput(qk):
    q, k = qk
    cover, cf = cover_rate(k, q)
    return (q,k,cover,cf)
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(processInput)(i) for i in qk_pairs)
output = np.zeros((len(modelYs)*3,len(sample_sizes)))
cf_output = np.zeros((len(modelYs)*3,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    output[i*3:i*3+3,j] = cover
    cf_output[i*3:i*3+3,j] = cf

output = np.zeros((len(modelYs)*3*2,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    out = [cover[0], cf[0], cover[1], cf[1], cover[2], cf[2]]
    output[i*6:i*6+6,j] = out
print(output)
pd.DataFrame(output).to_csv("sup_pop_cover_rate_cf.csv")

[[0.944      0.9445     0.944      0.952      0.954     ]
 [1.80234199 1.25015643 0.8801234  0.50566134 0.35030665]
 [0.985      0.993      0.9945     0.9955     0.9945    ]
 [2.36772461 1.72125886 1.22341252 0.71236333 0.49473946]
 [0.9375     0.9475     0.943      0.956      0.955     ]
 [1.81637864 1.26267061 0.88171985 0.50672453 0.35131238]
 [0.9465     0.9385     0.9495     0.941      0.9565    ]
 [1.90526345 1.28742638 0.89414025 0.5109643  0.3521738 ]
 [0.986      0.992      0.9945     0.994      0.996     ]
 [2.49224809 1.80444447 1.29116266 0.75203725 0.52144933]
 [0.9305     0.941      0.9485     0.942      0.9545    ]
 [1.86334026 1.27420438 0.89051532 0.50971501 0.35151337]
 [0.939      0.9525     0.9505     0.9455     0.9525    ]
 [2.47552641 1.7114374  1.19047982 0.67942363 0.46893078]
 [0.979      0.989      0.9915     0.992      0.995     ]
 [3.05600939 2.2309781  1.59147415 0.9251927  0.64030998]
 [0.9915     0.9925     0.997      0.998      0.9975    ]
 [3.72104474 2