In [1]:
from dgp import DGP_Finite, DGP
from inference import Inference2, Inference
import numpy as np
import pandas as pd 

# generate multuple sample size, multiple model designs, multiple inference method,
# a single estimator
dgp = DGP_Finite('1',1000)
Yobs, D, A = dgp.get_data()
inf = Inference2(Yobs, D, A, dgp.cluster, dgp.tuple_idx, dgp.tau10)
print(inf.inference('mp'), inf.inference('robust'), inf.inference('clustered'))

def cover_rate(sample_size=1000, modelY='1', ntrials=2000):
    cover = np.zeros((ntrials, 3))
    cf_length = np.zeros((ntrials, 3))
    for i in range(ntrials):
        dgp = DGP(modelY, '8', sample_size)
        Yobs, D, A = dgp.get_data()
        inf = Inference2(Yobs, D, A, dgp.cluster, dgp.tuple_idx, dgp.tau)
        #inf1 = Inference(dgp.Y, dgp.D, dgp.A, '8', tuple_idx=dgp.tuple_idx, tau=dgp.tau)
        #cover[i,0] = 1 - inf1.inference()[1]
        #cf_length[i,0] = inf1.se_tau10*1.96*2
        #inf.inference('mp')
        #print(inf.se_tau10 - inf1.se_tau10)
        cover[i,0] = inf.inference('mp')
        cf_length[i,0] = inf.se_tau10*1.96*2
        cover[i,1] = inf.inference('robust')
        cf_length[i,1] = inf.se_tau10*1.96*2
        cover[i,2] = inf.inference('clustered')
        cf_length[i,2] = inf.se_tau10*1.96*2
    return np.mean(cover, axis=0), np.mean(cf_length, axis=0)


def cover_rate_finite(sample_size=1000, modelY='1', ntrials=2000):
    cover = np.zeros((ntrials, 3))
    cf_length = np.zeros((ntrials, 3))
    dgp = DGP_Finite(modelY, sample_size)
    for i in range(ntrials):
        Yobs, D, A = dgp.get_data()
        inf = Inference2(Yobs, D, A, dgp.cluster, dgp.tuple_idx, dgp.tau10)
        cover[i,0] = inf.inference('mp')
        cf_length[i,0] = inf.se_tau10*1.96*2
        cover[i,1] = inf.inference('robust')
        cf_length[i,1] = inf.se_tau10*1.96*2
        cover[i,2] = inf.inference('clustered')
        cf_length[i,2] = inf.se_tau10*1.96*2
    return np.mean(cover, axis=0), np.mean(cf_length, axis=0)

[ 0.28511803 -2.94985339 -0.79622811  1.30075908 -0.37740243 -0.45671649
  1.12291558 -1.36328926 -0.06727684  0.92025191]
1 1 1


In [2]:
from joblib import Parallel, delayed
import multiprocessing

modelYs = ['1','2','3','4','5','6']
sample_sizes = [50, 100, 200, 500, 1000]

sample_sizes = [40, 80, 160, 480, 1000]

qk_pairs = [(q,k) for q in modelYs for k in sample_sizes]
def processInput(qk):
    q, k = qk
    cover, cf = cover_rate_finite(k, q)
    return (q,k,cover,cf)
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(processInput)(i) for i in qk_pairs)
output = np.zeros((len(modelYs)*3,len(sample_sizes)))
cf_output = np.zeros((len(modelYs)*3,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    output[i*3:i*3+3,j] = cover
    cf_output[i*3:i*3+3,j] = cf

output = np.zeros((len(modelYs)*3*2,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    out = [cover[0], cf[0], cover[1], cf[1], cover[2], cf[2]]
    output[i*6:i*6+6,j] = out
print(output)
pd.DataFrame(output).to_csv("finite_pop_cover_rate_cf.csv")

[[0.894      0.936      0.948      0.9415     0.949     ]
 [1.90998046 1.29848517 0.85468807 0.47237049 0.34841223]
 [0.9425     0.9795     0.99       0.9935     0.994     ]
 [2.22653858 1.65988931 1.12011495 0.67076286 0.49253928]
 [0.928      0.937      0.944      0.94       0.95      ]
 [2.1806547  1.32819684 0.84854998 0.46729405 0.35137158]
 [0.9445     0.932      0.961      0.95       0.955     ]
 [1.64279772 1.23539943 0.950942   0.50093023 0.34734266]
 [0.9935     0.987      0.997      0.997      0.9975    ]
 [2.3633251  1.6693804  1.347768   0.76191486 0.53695825]
 [0.928      0.943      0.9505     0.9455     0.951     ]
 [1.64843893 1.31121676 0.9056516  0.49722379 0.34360742]
 [1.         0.995      0.9965     0.9875     0.9945    ]
 [2.6327793  1.72854596 1.12182745 0.66385    0.45228325]
 [1.         0.9995     1.         0.999      1.        ]
 [2.85566206 2.41606353 1.57099034 0.87885795 0.64004041]
 [1.         1.         1.         1.         1.        ]
 [3.65877927 2

In [3]:
from joblib import Parallel, delayed
import multiprocessing

modelYs = ['1','2','3','4','5','6']
sample_sizes = [50, 100, 200, 500, 1000]

sample_sizes = [40, 80, 160, 480, 1000]

qk_pairs = [(q,k) for q in modelYs for k in sample_sizes]
def processInput(qk):
    q, k = qk
    cover, cf = cover_rate(k, q)
    return (q,k,cover,cf)
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(processInput)(i) for i in qk_pairs)
output = np.zeros((len(modelYs)*3,len(sample_sizes)))
cf_output = np.zeros((len(modelYs)*3,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    output[i*3:i*3+3,j] = cover
    cf_output[i*3:i*3+3,j] = cf

output = np.zeros((len(modelYs)*3*2,len(sample_sizes)))
for (q,k,cover,cf) in results:
    i = int(q)-1
    j = sample_sizes.index(k)
    out = [cover[0], cf[0], cover[1], cf[1], cover[2], cf[2]]
    output[i*6:i*6+6,j] = out
print(output)
pd.DataFrame(output).to_csv("sup_pop_cover_rate_cf.csv")

[[0.9395     0.942      0.9495     0.949      0.95      ]
 [1.79995664 1.25066195 0.88306902 0.50508567 0.350895  ]
 [0.985      0.9925     0.9925     0.9935     0.994     ]
 [2.36526676 1.7121038  1.23071611 0.71326964 0.49485359]
 [0.934      0.9445     0.95       0.9505     0.95      ]
 [1.80693924 1.26169121 0.88477037 0.50619671 0.35101611]
 [0.931      0.941      0.954      0.9565     0.949     ]
 [1.88942156 1.29918163 0.89433633 0.50915378 0.35191122]
 [0.9855     0.989      0.996      0.9965     0.9935    ]
 [2.47362646 1.79678045 1.29010987 0.74979274 0.52253851]
 [0.926      0.936      0.947      0.9535     0.9475    ]
 [1.86727011 1.28244983 0.89109521 0.50719925 0.35162989]
 [0.948      0.9335     0.9455     0.9405     0.9495    ]
 [2.50503519 1.71579346 1.18794601 0.67729913 0.4696091 ]
 [0.983      0.9845     0.988      0.9925     0.9915    ]
 [3.08043487 2.23578224 1.58990003 0.92202029 0.6411362 ]
 [0.994      0.995      0.9945     0.998      0.9985    ]
 [3.75553569 2

In [4]:
cover_rate_finite(1000,'1')


[ 0.10373565 -0.64239603  0.49839301 -0.26575847 -0.4254577  -0.20111127
  0.00340742 -0.77378826  0.33024988  1.42987445]


(array([0.94  , 0.9935, 0.9425]), array([0.34852694, 0.49391664, 0.35096135]))