In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time
import torch as th
import girth
from girth import rasch_conditional
from irt.data.rasch import generate_data, generate_data_positive_scores
from irt.algorithms.spectral_estimator import spectral_estimate
from irt.algorithms import spectral_estimator
from irt.algorithms import conditional_mle, joint_mle
from irt.algorithms import rasch_mml
from irt.algorithms import eigen_vector_method
from irt.evaluation import eval_utils
from irt.algorithms import pairwise_mle
from irt.algorithms import bayesian_1pl
# from girth_mcmc import GirthMCMC
from scipy.special import expit

# import data (you supply this function)
from scipy.stats import norm
# my_data = import_data(filename)

# # Assume its dichotomous data with True -> 1 and False -> 0
# tagged_data = tag_missing_data(my_data, [0, 1])

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def relative_betas_error(beta, betah):
    assert(len(beta) == len(betah))
    beta_norm = beta - np.mean(beta)
    betah_norm = betah - np.mean(betah)
    return np.linalg.norm(beta_norm - betah_norm)

def relative_z_error(z, zh):
    return np.linalg.norm(z - zh)/np.linalg.norm(z)

    

In [None]:
errors_arr = []
errors_ase_arr = []
errors_cmle_arr = []
errors_mmle_arr = []
errors_choppin_arr = []
errors_garner_arr = []
errors_saaty_arr = []
errors_pair_arr = []

time_arr = []
time_ase_arr = []
time_cmle_arr = []
time_mmle_arr = []
time_choppin_arr = []
time_garner_arr = []
time_saaty_arr = []
time_pair_arr = []

n_array = [50, 100, 500, 1000, 2500, 5000]
n_trials = 10
m = 10
p = 1.0

test_var = 1
betas = np.random.normal(0, test_var, size=(m,))
student_var = 1

for n in n_array:
    # w_students = np.ones((n,))
    # p = np.sqrt(np.log(m)/n)
    thetas = np.random.normal(0, student_var, size=(n,))

    error_ase = []
    error_cmle = []
    error_mmle = []
    error_choppin = []
    error_garner = []
    error_saaty = []
    error_pair = []
    error_bayesian = []
    
    time_ase = []
    time_cmle = []
    time_mmle = []
    time_garner = []
    time_choppin = []
    time_saaty = []
    time_pair = []
    time_bayesian = []
    
    auc_mmle = []
    auc_ase = []
    
    for _ in range(n_trials):
        # Generate data
        data = generate_data(betas, thetas, p)

        # Accelerated spectral method
        start = time.time()
        lambd = 1./(m * np.log(m))
        est_ase = spectral_estimate(data, lambd=lambd)
        time_ase += [(time.time() - start)]
        error_ase += [relative_betas_error(betas, est_ase)]
        
        # Bayesian
        models = []
        items = []
        responses = []
        for j in range(m):
            for i in range(n):
                if data[j, i] != -99999:
                    models.append(i)
                    items.append(j)
                    responses.append(data[j, i])
        models = th.tensor(models, dtype=th.long)
        items = th.tensor(items, dtype=th.long)
        responses = th.tensor(responses, dtype=th.float)
        
        try:
            bayesian_estimator = bayesian_1pl.OneParamLog(priors="hierarchical", num_items=m, num_subjects=n)
            start = time.time()
            bayesian_estimator.fit(models, items, responses)
            time_bayesian += [time.time() - start]
            est_bayesian = bayesian_estimator.export()["diff"]
            error_bayesian += [relative_betas_error(betas, est_bayesian)]
        except Exception as e:
            time_bayesian += [np.nan]
            error_bayesian += [np.nan]
        
        # CMLE
        
        # Choppin method
#         start = time.time()
#         # est_choppin = eigen_vector_method.choppin_method(data, return_beta=True)
#         # est_choppin = rasch_conditional(data)["Difficulty"]
#         est_choppin = conditional_mle.rasch_conditional_modified(data)
#         time_choppin += [(time.time() - start)]
#         error_choppin += [relative_betas_error(betas, est_choppin)]
                
        # Saaty's method
        # start = time.time()
        # est_saaty = eigen_vector_method.saaty_method(data, return_beta=True)
        # time_saaty += [(time.time() - start)]
        # error_saaty += [relative_betas_error(betas, est_saaty)]
        
        # Pairwise method
        # start = time.time()
        # est_pair = pairwise_mle.cmle_pairwise(data)
        # time_pair += [(time.time() - start)]
        # error_pair += [relative_betas_error(betas, est_pair)]        

#     errors_ase_arr.append(error_ase)
#     errors_cmle_arr.append(error_cmle)
#     errors_mmle_arr.append(error_mmle)
#     errors_choppin_arr.append(error_choppin)
#     errors_garner_arr.append(error_garner)
#     errors_saaty_arr.append(error_saaty)
    
#     time_ase_arr.append(time_ase)
#     time_cmle_arr.append(time_cmle)
#     time_mmle_arr.append(time_mmle)
#     time_choppin_arr.append(time_choppin)
#     time_garner_arr.append(time_garner)
#     time_saaty_arr.append(time_saaty)
    
    print(f"n={n}, p={p}, m={m}, ASE={np.nanmean(error_ase)} ({np.nanmean(time_ase)})," +
          # f" CMLE={np.nanmean(error_cmle)} ({np.nanmean(time_cmle)}), MMLE={np.nanmean(error_mmle)} ({np.nanmean(time_mmle)}), " +
          # f"Choppin={np.nanmean(error_choppin)} ({np.nanmean(time_choppin)}), Pair={np.nanmean(error_pair)} ({np.nanmean(time_pair)}), Saaty={np.nanmean(error_saaty)} ({np.nanmean(time_saaty)}), " +
         f"Bayesian={np.nanmean(error_bayesian)} ({np.nanmean(time_bayesian)}) ")

n=50, p=1.0, m=10, ASE=1.3843625417092125 (0.007961726188659668),Bayesian=2.3967332085964403 (161.05302166938782) 
n=100, p=1.0, m=10, ASE=0.8922798394204106 (0.010030055046081543),Bayesian=2.2014307446687864 (159.08718929971968) 
