## Synthetic Experiments

In this notebook, we generate synthetic data sets with a ground truth item response theory model and test how often the bounds returned by various confidence bound methods actually include the 'true' ability parameter and how precisely the bound estimates the difference between true parameter and estimated parameter.

In [1]:
# set up data generation function
def sample_data(m, n):
    theta = np.random.randn(m)
    b     = np.random.randn(n)
    P     = 1. / (1. + np.exp(-(np.expand_dims(theta, 1) - np.expand_dims(b, 0))))
    X     = np.random.rand(m, n)
    X[X >= 1. - P] = 1.
    X[X <  1. - P] = 0.
    return theta, b, P, X
# set up a function to evaluate coverage
def eval_coverage(theta, theta_min, theta_max):
    return np.mean(np.logical_and(theta >= theta_min, theta <= theta_max))
# set up a function to compare the bound size with the size needed to cover the true theta
def eval_logbias(theta, theta_est, theta_min, theta_max):
    ratios  = np.zeros_like(theta)
    small = theta < theta_est
    lo    = theta_est[small] - theta_min[small]
    ratios[small] = lo / (theta_est[small] - theta[small])
    large = theta >= theta_est
    hi    = theta_max[large] - theta_est[large]
    ratios[large] = hi / (theta[large] - theta_est[large])
    return np.mean(np.log(ratios))

In [2]:
# set up experimental hyper-parameters
experimental_conditions = [
    (30, 10),
    (30, 20),
    (50, 10),
    (50, 20),
    (100, 10),
    (100, 20),
    (500, 10),
    (500, 20)
]
R     = 10

regul = 1.
alpha = .95
from scipy.stats import chi2
absolute_bound = .5 * chi2.ppf(alpha, df = 1)
mu    = .01

method_labels = ['wald', 'likelihood-profile', 'barrier', 'AO(1)', 'AO(2)', 'AO(3)']

In [6]:
# perform experiment in varying conditions
import numpy as np
from tqdm import tqdm
import time
import ability_bounds

for i in range(len(experimental_conditions)):
    m, n = experimental_conditions[i]
    print('--- condition %d; m = %d, n = %d ---' % (i+1, m, n))

    coverage = np.zeros((len(method_labels), R))
    logbias  = np.zeros((len(method_labels), R))
    runtimes = np.zeros((len(method_labels), R))

    for r in tqdm(range(R)):
        # sample new data set
        theta, b, P, X = sample_data(m, n)
        # iterate over all methods
        for method in range(len(method_labels)):
            # set up a fresh model
            if method_labels[method] == 'wald':
                model = ability_bounds.WaldBounds(regul, alpha)
            elif method_labels[method] == 'likelihood-profile':
                model = ability_bounds.LikelihoodProfile(regul, alpha)
            elif method_labels[method] == 'barrier':
                model = ability_bounds.BarrierBounds(regul, absolute_bound = absolute_bound)
            elif method_labels[method].startswith('AO'):
                num_iterations = int(method_labels[method][3])
                model = ability_bounds.AOBounds(regul, absolute_bound = absolute_bound, num_iterations = num_iterations)
            else:
                raise ValueError('unknown method: %s' % method_labels[method])
            # fit the model to the data
            start = time.time()
            model.fit(X)
            runtimes[method, r] = time.time() - start
            # evaluate the model
            coverage[method, r] = eval_coverage(theta, model.theta_min_, model.theta_max_)
            logbias[method, r]  = eval_logbias(theta, model.theta_, model.theta_min_, model.theta_max_)

    # print current results
    print('method              \tcoverage\tlogbias\t\truntime')
    for method in range(len(method_labels)):
        row = method_labels[method] + (20 - len(method_labels[method])) * ' '
        for measure in [coverage, logbias, runtimes]:
            row += '\t%.3f +- %.3f' % (np.mean(measure[method, :]), np.std(measure[method, :]))
        print(row)
    # store current results
    filename = 'results_%d_%d.csv' % (m, n)
    datamat  = np.concatenate((coverage.T, logbias.T, runtimes.T), 1)
    header   = []
    for measure in ['coverage', 'logbias', 'runtime']:
        for method_label in method_labels:
            header.append('%s_%s' % (measure, method_label))
    np.savetxt(filename, datamat, delimiter = '\t', fmt = '%g', header = '\t'.join(header), comments = '')

--- condition 1; m = 30, n = 10 ---


100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


method              	coverage	logbias		runtime
wald                	1.000 +- 0.000	1.798 +- 0.159	0.008 +- 0.002
likelihood-profile  	0.970 +- 0.018	1.352 +- 0.161	0.480 +- 0.013
barrier             	0.917 +- 0.027	1.191 +- 0.153	0.066 +- 0.004
AO(1)               	0.957 +- 0.021	1.331 +- 0.162	0.034 +- 0.001
AO(2)               	0.970 +- 0.018	1.352 +- 0.161	0.166 +- 0.005
AO(3)               	0.970 +- 0.018	1.352 +- 0.161	0.262 +- 0.009
--- condition 2; m = 30, n = 20 ---


  return np.mean(np.log(ratios))
100%|██████████| 10/10 [00:10<00:00,  1.06s/it]


method              	coverage	logbias		runtime
wald                	0.997 +- 0.010	1.888 +- 0.169	0.009 +- 0.001
likelihood-profile  	0.930 +- 0.064	1.244 +- 0.166	0.492 +- 0.023
barrier             	0.093 +- 0.049	-inf +- nan	0.049 +- 0.003
AO(1)               	0.927 +- 0.068	1.212 +- 0.165	0.035 +- 0.001
AO(2)               	0.930 +- 0.064	1.244 +- 0.166	0.179 +- 0.006
AO(3)               	0.930 +- 0.064	1.244 +- 0.166	0.289 +- 0.010
--- condition 3; m = 50, n = 10 ---


100%|██████████| 10/10 [00:18<00:00,  1.80s/it]


method              	coverage	logbias		runtime
wald                	0.998 +- 0.006	1.834 +- 0.154	0.010 +- 0.002
likelihood-profile  	0.962 +- 0.030	1.361 +- 0.152	0.884 +- 0.045
barrier             	0.920 +- 0.028	1.204 +- 0.155	0.110 +- 0.006
AO(1)               	0.956 +- 0.025	1.347 +- 0.152	0.054 +- 0.002
AO(2)               	0.962 +- 0.030	1.361 +- 0.152	0.295 +- 0.012
AO(3)               	0.962 +- 0.030	1.361 +- 0.152	0.448 +- 0.018
--- condition 4; m = 50, n = 20 ---


100%|██████████| 10/10 [00:18<00:00,  1.89s/it]


method              	coverage	logbias		runtime
wald                	1.000 +- 0.000	1.936 +- 0.143	0.017 +- 0.002
likelihood-profile  	0.934 +- 0.024	1.256 +- 0.146	0.902 +- 0.030
barrier             	0.080 +- 0.052	-inf +- nan	0.093 +- 0.006
AO(1)               	0.928 +- 0.027	1.231 +- 0.146	0.062 +- 0.003
AO(2)               	0.934 +- 0.024	1.256 +- 0.146	0.319 +- 0.007
AO(3)               	0.934 +- 0.024	1.256 +- 0.146	0.499 +- 0.004
--- condition 5; m = 100, n = 10 ---


100%|██████████| 10/10 [00:44<00:00,  4.42s/it]


method              	coverage	logbias		runtime
wald                	0.995 +- 0.007	1.806 +- 0.132	0.031 +- 0.002
likelihood-profile  	0.959 +- 0.018	1.311 +- 0.137	2.266 +- 0.043
barrier             	0.916 +- 0.028	1.157 +- 0.132	0.284 +- 0.011
AO(1)               	0.955 +- 0.020	1.303 +- 0.138	0.141 +- 0.004
AO(2)               	0.959 +- 0.018	1.311 +- 0.137	0.739 +- 0.007
AO(3)               	0.959 +- 0.018	1.311 +- 0.137	0.957 +- 0.008
--- condition 6; m = 100, n = 20 ---


100%|██████████| 10/10 [00:50<00:00,  5.00s/it]


method              	coverage	logbias		runtime
wald                	0.998 +- 0.004	2.053 +- 0.096	0.048 +- 0.003
likelihood-profile  	0.951 +- 0.017	1.339 +- 0.095	2.555 +- 0.113
barrier             	0.080 +- 0.025	-inf +- nan	0.242 +- 0.008
AO(1)               	0.949 +- 0.016	1.324 +- 0.095	0.156 +- 0.003
AO(2)               	0.951 +- 0.017	1.339 +- 0.095	0.839 +- 0.017
AO(3)               	0.951 +- 0.017	1.339 +- 0.095	1.162 +- 0.039
--- condition 7; m = 500, n = 10 ---


100%|██████████| 10/10 [06:19<00:00, 37.95s/it]


method              	coverage	logbias		runtime
wald                	0.998 +- 0.002	1.829 +- 0.045	0.398 +- 0.025
likelihood-profile  	0.950 +- 0.009	1.308 +- 0.041	20.217 +- 0.742
barrier             	0.907 +- 0.010	1.161 +- 0.045	2.646 +- 0.090
AO(1)               	0.949 +- 0.008	1.306 +- 0.041	0.855 +- 0.036
AO(2)               	0.950 +- 0.009	1.308 +- 0.041	5.912 +- 0.186
AO(3)               	0.950 +- 0.009	1.308 +- 0.041	7.919 +- 0.261
--- condition 8; m = 500, n = 20 ---


  return np.mean(np.log(ratios))
  return np.mean(np.log(ratios))
100%|██████████| 10/10 [10:26<00:00, 62.68s/it]

method              	coverage	logbias		runtime
wald                	1.000 +- 0.000	2.064 +- 0.057	0.756 +- 0.070
likelihood-profile  	0.950 +- 0.009	1.306 +- 0.055	35.319 +- 0.817
barrier             	0.080 +- 0.021	-inf +- nan	2.704 +- 0.092
AO(1)               	0.949 +- 0.009	1.303 +- 0.055	1.184 +- 0.063
AO(2)               	0.950 +- 0.009	1.306 +- 0.055	10.124 +- 0.198
AO(3)               	0.950 +- 0.009	1.306 +- 0.055	12.592 +- 0.167



  x = asanyarray(arr - arrmean)
