# Particle physics results

## Setup

In [21]:
import sys
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KernelDensity
from itertools import product


## Load results

In [22]:
n_runs = 6
n_chains = 4
n_trueparams = 3
remove_all_results_with_nans = False

In [23]:
algo_filenames = []
algo_additionals = []
algo_labels = []
algo_dividers = []
algo_dims = []

def add_algo(filename, add, label, dim=""):
    algo_filenames.append(filename)
    algo_additionals.append(add)
    algo_labels.append(label)
    algo_dims.append(dim)
    
    
def add_divider():
    algo_dividers.append(len(algo_filenames))
    

add_algo("flow", "_may", "2D summaries", "2d")
add_algo("alices", "_may", "ALICES")
add_algo("flow", "_may", "AF", "40d")
add_algo("pie", "_may", "PIE", "40d")
add_algo("mf", "_sequential_may", "MFMF-M/D", "40d")
#add_algo("emf", "_sequential_may", "MFMFE-M/D", "40d")

n_algos = len(algo_filenames)


In [24]:
def load(name, shape, numpyfy=True, chains=1, result_dir="../data/results"):
    all_results = []
    
    for algo_filename, algo_add, algo_dim in zip(algo_filenames, algo_additionals, algo_dims):
        algo_results = []
            
        for run in range(n_runs):
            run_str = "" if run == 0 else "_run{}".format(run)

            try:
                this_result = np.load(
                    "{}/{}_{}_lhc{}{}{}_{}.npy".format(
                        result_dir, algo_filename, "2" if algo_dim == "2d" else "14",
                        algo_dim, algo_add, run_str, name
                    )
                )
                if (not numpyfy) or (shape is None) or np.product(this_result.shape) == np.product(shape):
                    algo_results.append(this_result.reshape(shape))
                else:
                    algo_results.append(np.nan*np.ones(shape))

            except FileNotFoundError as e:
                print(e)
                if shape is None:
                    algo_results.append(None)
                else:
                    algo_results.append(np.nan*np.ones(shape))
            
        all_results.append(algo_results)
    
    if numpyfy:
        all_results = np.array(all_results, dtype=np.float)
        
    return all_results


model_gen_x = load("samples", None, numpyfy=False)
model_gen_closure = load("samples_manifold_distance", (10000,))


[Errno 2] No such file or directory: '../data/results/flow_2_lhc2d_may_run5_samples.npy'
[Errno 2] No such file or directory: '../data/results/alices_14_lhc_may_samples.npy'
[Errno 2] No such file or directory: '../data/results/alices_14_lhc_may_run1_samples.npy'
[Errno 2] No such file or directory: '../data/results/alices_14_lhc_may_run2_samples.npy'
[Errno 2] No such file or directory: '../data/results/alices_14_lhc_may_run3_samples.npy'
[Errno 2] No such file or directory: '../data/results/alices_14_lhc_may_run4_samples.npy'
[Errno 2] No such file or directory: '../data/results/alices_14_lhc_may_run5_samples.npy'
[Errno 2] No such file or directory: '../data/results/flow_14_lhc40d_may_run5_samples.npy'
[Errno 2] No such file or directory: '../data/results/pie_14_lhc40d_may_run5_samples.npy'
[Errno 2] No such file or directory: '../data/results/mf_14_lhc40d_sequential_may_run3_samples.npy'
[Errno 2] No such file or directory: '../data/results/flow_2_lhc2d_may_samples_manifold_distanc

In [26]:
def load_mcmc(name, shape, numpyfy=True, result_dir="../data/results"):
    all_results = []
    
    for algo_filename, algo_add, algo_dim in zip(algo_filenames, algo_additionals, algo_dims):
        algo_results = []
            
        for run in range(n_runs):
            run_str = "" if run == 0 else "_run{}".format(run)
            
            for trueparam in range(n_trueparams):
                trueparam_str = "" if trueparam == 0 else "_trueparam{}".format(trueparam)
            
                for chain in range(n_chains):
                    chain_str = "" if chain == 0 else "_chain{}".format(chain)

                    try:
                        this_result = np.load(
                            "{}/{}_{}_lhc{}{}{}_{}{}{}.npy".format(
                                result_dir, algo_filename, "2" if algo_dim == "2d" else "14",
                                algo_dim, algo_add, run_str, name, trueparam_str, chain_str
                            )
                        )
                        if (not numpyfy) or (shape is None) or np.product(this_result.shape) == np.product(shape):
                            algo_results.append(this_result.reshape(shape))
                        else:
                            algo_results.append(np.nan*np.ones(shape))

                    except FileNotFoundError as e:
                        # print(e)
                        if shape is None:
                            algo_results.append(None)
                        else:
                            algo_results.append(np.nan*np.ones(shape))
            
        all_results.append(algo_results)
    
    all_results = np.array(all_results, dtype=np.float)
    all_results = all_results.reshape([all_results.shape[0], n_runs, n_trueparams, n_chains] + list(shape))
        
    return all_results


model_posterior_samples = load_mcmc("posterior_samples", (750, 2,))
model_posterior_samples.shape  # (algo, run, true param id, chain, sample, theta component)


(5, 6, 3, 4, 750, 2)

In [27]:
def remove_nans(*results):
    passes = all([np.all(np.isfinite(result)) for result in results])
    
    if passes:
        return results
    else:
        return [np.nan * np.ones_like(result) for result in results]


def remove_nans_from_lists(*raws):
    # raws[quantity][algo]
    n_quantities = len(raws)
    n_algos = len(raws[0])
    
    for raw in raws:
        assert len(raw) == n_algos
    
    cleans = [[[] for _ in range(n_algos)] for _ in range(n_quantities)]
    
    for i in range(n_algos):
        for k in range(n_runs):
            clean = remove_nans(*[raw[i][k] for raw in raws])
            for j in range(n_quantities):
                cleans[j][i].append(clean[j])
            
    cleans = [np.array(clean) for clean in cleans]
    
    # cleans[quantity][algo]
    return cleans
     
    
if remove_all_results_with_nans:
    model_gen_closure, model_posterior_samples = remove_nans_from_lists(model_gen_closure, model_posterior_samples)


## Calculate metrics

In [28]:
min_logp = -100.
model_gen_mean_closure = np.mean(model_gen_closure, axis=2)


In [29]:
bandwidth = 0.1
true_param_points = np.array([[0.,0.], [0.5, 0.], [-1., -1.]])

model_true_log_posteriors = []

for algo, run, trueparam in product(range(n_algos), range(n_runs), range(n_trueparams)):
    mcmcs = model_posterior_samples[algo, run, trueparam].reshape((-1, 2))
    mcmcs = mcmcs[np.all(np.isfinite(mcmcs), axis=-1)]
    
    if len(mcmcs) == 0:
        model_true_log_posteriors.append(np.nan)
        continue
        
    kde = KernelDensity(kernel="gaussian", bandwidth=bandwidth)
    kde.fit(mcmcs)
    model_true_log_posteriors.append(kde.score(true_param_points[trueparam].reshape((1, 2))))

model_true_log_posteriors = np.array(model_true_log_posteriors).reshape((n_algos, n_runs, n_trueparams)) 


## Best metrics

In [30]:
model_true_log_posteriors.shape

(5, 6, 3)

In [31]:
best_closure, best_posterior = -1, -1

best_closure = np.nanargmin(np.nanmedian(model_gen_mean_closure, axis=1))
print(algo_labels[best_closure])
    
best_posterior = np.nanargmax(np.nanmean(np.nanmean(model_true_log_posteriors, axis=-1), axis=-1))
print(algo_labels[best_posterior])


AF
ALICES


  r = func(a, **kwargs)
  


## Print metrics

In [35]:
def print_results(
    include_err=True, include_n_runs=False, include_runs=False,
    l_label=max([len(l) for l in algo_labels]), l_mean=5, l_err=2,
    median=True, divide_std_by_sqrtn=False, latex=False, after_decs=(3,2)
):
    # How to format the numbers
    l_result = (
        l_mean + int(include_err) * (2 + l_err)
        + int(include_n_runs) * 4
        + int(include_runs) * (3 + n_runs*l_mean + (n_runs - 1)*2)
    )
    l_total = l_label + 1 + 2 * (3 + l_result)
        
    # Divider
    empty_result = "" if latex else " "*(l_result + 1)
    col_divider = "&" if latex else "|"
    line_end = r"\\" if latex else ""
    block_divider = r"\midrule" if latex else "-"*l_total
    
    def _f(val, after_dec, best=False):
        if not np.any(np.isfinite(val)):
            return empty_result
        
        if median:
            result = "{:>{}.{}f}".format(np.nanmedian(val), l_mean, after_dec)
        else:
            result = "{:>{}.{}f}".format(np.nanmean(val), l_mean, after_dec)
            
        if latex and best:
            result = r"\textbf{" + result + "}"
            
        if include_err:
            err_str = "({:0>{}d})".format(int(10**after_dec * np.nanstd(val) / np.sum(np.isfinite(val))**(0.5*float(divide_std_by_sqrtn))), l_err)
            if latex:
                result += r"\,\textcolor{dark-gray}{" + err_str + "}"
            else:
                result += err_str
            
        if include_n_runs:
            result += " [{:1n}]".format(np.sum(np.isfinite(val)))
        if include_runs:
            result += " [{:>{}.{}f}, ".format(np.nanmean(val[0]), l_mean, after_dec)
            for i in range(1, n_runs - 1):
                result += "{:>{}.{}f}, ".format(np.nanmean(val[i]), l_mean, after_dec)
            result += "{:>{}.{}f}]".format(np.nanmean(val[-1]), l_mean, after_dec)
            result = result.replace("nan", "   ")
            
        if (not latex) and best:
            result += "*"
        elif (not latex) and (not best):
            result += " "
            
        return result
    
    # Header
    print(
        "{2:<{0}.{0}s} {5} {3:>{1}.{1}s} {5} {4:>{1}.{1}s} {6}".format(
            l_label, l_result, "", "Closure", "log p", col_divider, line_end
        )
    )
    print(block_divider)

    # Iterate over methods
    for i, (label, closure, posterior) in enumerate(zip(
        algo_labels, model_gen_mean_closure, model_true_log_posteriors
    )):
        # Divider
        if i in algo_dividers:
            print(block_divider)
            
        # Print results
        print("{1:<{0}.{0}s} {4} {2}{4} {3} {5}".format(
            l_label, label,
            _f(closure, after_decs[0], i==best_closure), _f(posterior[:,:], after_decs[1], i==best_posterior),
            col_divider, line_end
        ))


In [36]:
print_results(include_runs=True)

             |                                              Closure |                                                log p 
---------------------------------------------------------------------------------------------------------------------------
2D summaries |                                                      |  0.81(185) [ 0.35,  0.68,  0.66,  0.13,  0.66,      ]  
ALICES       |                                                      |  1.71(109) [ 1.42,  1.27,  1.54,  1.64,  1.72,      ]* 
AF           | 0.013(05) [0.009, 0.023, 0.013, 0.009, 0.019,      ]*| -4.95(1526) [-12.76, -17.43, -11.56, -5.00, -10.93,      ]  
PIE          | 0.043(15) [0.056, 0.019, 0.043, 0.026, 0.058,      ] | -0.83(520) [-3.90, -0.04, -5.68, -1.22, -2.65,      ]  
MFMF-M/D     | 0.018(75) [0.073, 0.009, 0.017,      , 0.018, 0.210] | -1.85(203) [-0.84, -2.70, -1.11,      , -2.50, -3.14]  




In [37]:
print_results(median=True)

             |   Closure |     log p 
-------------------------------------
2D summaries |           |  0.81(185)  
ALICES       |           |  1.71(109)* 
AF           | 0.013(05)*| -4.95(1526)  
PIE          | 0.043(15) | -0.83(520)  
MFMF-M/D     | 0.018(75) | -1.85(203)  


In [38]:
print_results(median=False)

             |   Closure |     log p 
-------------------------------------
2D summaries |           |  0.49(185)  
ALICES       |           |  1.52(109)* 
AF           | 0.015(05)*| -11.53(1526)  
PIE          | 0.040(15) | -2.70(520)  
MFMF-M/D     | 0.065(75) | -2.06(203)  


In [14]:
print_results(latex=True)

             &   Closure &     log p \\
\midrule
2D summaries & & \textbf{ 0.81}\,\textcolor{dark-gray}{(185)} \\
ALICES       & &  \\
AF           & &  \\
PIE          & \textbf{0.043}\,\textcolor{dark-gray}{(15)}& -0.83\,\textcolor{dark-gray}{(520)} \\
MFMF-M/D     & &  \\
