# First tests of inference methods on the epidemiology problem

In [1]:
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt

## Data

In [2]:
sample_sizes = [100, 1000, 10000]
methods = ['maf', 'scandal']
method_labels = ['MAF', 'SCANDAL']

In [None]:
expected_log_likelihoods = []
log_likelihood_mses = []
roc_aucs = []

log_likelihood_reference = np.load('../data/results/epidemiology/maf/epidemiology_maf_log_p_hat.npy')

for method in methods:
    log_likelihood_mses_this_method = []
    roc_aucs_this_method = []
    expected_log_likelihoods_this_method = []
    
    for sample_size in sample_sizes:
        log_likelihood = np.load('../data/results/epidemiology/' + method
                                 + '/epidemiology_maf_samplesize_' + str(sample_size) + '_log_p_hat.npy')
        roc_auc = np.load('../data/results/epidemiology/' + method
                          + '/epidemiology_maf_samplesize_' + str(sample_size)
                          + '_roc_auc_surrogate_vs_simulator')
        
        mse = mean_squared_error(log_likelihood_reference, log_likelihood)
        expected_log_likelihood = 1. / log_likelihood.shape[0] * np.sum(log_likelihood)
        
        expected_log_likelihoods_this_method.append(expected_log_likelihood)
        roc_aucs_this_method.append(roc_auc)
        log_likelihood_mses_this_method.append(mse)
        
    log_likelihood_mses_this_method.append(log_likelihood_mses_this_method)
    roc_aucs.append(roc_aucs_this_method)
    expected_log_likelihoods.append(expected_log_likelihoods_this_method)
    
expected_log_likelihoods = np.array(expected_log_likelihoods)
log_likelihood_mses = np.array(log_likelihood_mses)
roc_aucs = np.array(roc_aucs)


## Plot sample size vs performance

In [None]:
fig = plt.figure(figsize=(12,4))



ax = plt.subplot(1,3,1)

for m, method in enumerate(method_labels):
    plt.plot(sample_sizes, expected_log_likelihoods[m],
             lw=1.5, ls='-', marker='0', ms=10.,
             label=method)
    
    plt.legend()
    
    plt.xlabel('Training sample size')
    plt.ylabel('Exp. log likelihood')



ax = plt.subplot(1,3,2)

for m, method in enumerate(method_labels):
    plt.plot(sample_sizes, log_likelihood_mses[m],
             lw=1.5, ls='-', marker='0', ms=10.,
             label=method)
    
    plt.legend()
    
    plt.xlabel('Training sample size')
    plt.ylabel('MSE (log likelihood) wrt high-statistics model')



ax = plt.subplot(1,3,3)

for m, method in enumerate(method_labels):
    plt.plot(sample_sizes, roc_aucs[m],
             lw=1.5, ls='-', marker='0', ms=10.,
             label=method)
    
    plt.legend()
    
    plt.xlabel('Training sample size')
    plt.ylabel('ROC AUC between simulator and surrogate samples')
    
    

plt.tight_layout()
plt.show()
