# Particle physics results

## Setup

In [1]:
import sys
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KernelDensity
from itertools import product


## Load results

In [2]:
n_runs = 10
n_chains = 4
n_trueparams = 3

In [14]:
algo_filenames = []
algo_additionals = []
algo_labels = []
algo_dividers = []
algo_dims = []

def add_algo(filename, add, label, dim=""):
    algo_filenames.append(filename)
    algo_additionals.append(add)
    algo_labels.append(label)
    algo_dims.append(dim)
    
    
def add_divider():
    algo_dividers.append(len(algo_filenames))

add_algo("flow", "_may", "AF", "40d")
add_algo("pie", "_conditionalmanifold_may", "PIE (conditional manifold)", "40d")
add_algo("pie", "_may", "PIE (unconditional manifold)", "40d")
add_algo("mf", "_sequential_may", "MFMF-M/D (sequential)", "40d")
add_algo("mf", "_alternate_may", "MFMF-M/D (alternating)", "40d")
add_algo("emf", "_sequential_may", "MFMFE-M/D (sequential)", "40d")
add_algo("emf", "_alternate_may", "MFMFE-M/D (alternating)", "40d")

add_divider()

add_algo("flow", "_scandal_may", "AF-SCANDAL", "40d")
add_algo("pie", "_conditionalmanifold_scandal_may", "PIE-SCANDAL (conditional manifold)", "40d")
add_algo("pie", "_scandal_may", "PIE-SCANDAL (unconditional manifold)", "40d")
add_algo("mf", "_sequential_scandal_may", "MFMF-M/SCANDAL (sequential)", "40d")
add_algo("mf", "_alternate_scandal_may", "MFMF-M/SCANDAL (alternating)", "40d")
add_algo("emf", "_sequential_scandal_may", "MFMFE-M/SCANDAL (sequential)", "40d")
add_algo("emf", "_alternate_scandal_may", "MFMFE-M/SCANDAL (alternating)", "40d")

add_divider()

add_algo("flow", "_may", "2D summaries (AF)", "2d")
add_algo("flow", "_scandal_may", "2D summaries (SCANDAL)", "2d")
add_algo("alices", "_may", "ALICES")

n_algos = len(algo_filenames)


In [15]:
def load(name, shape, numpyfy=True, chains=1, result_dir="../data/results"):
    all_results = []
    
    for algo_filename, algo_add, algo_dim in zip(algo_filenames, algo_additionals, algo_dims):
        algo_results = []
            
        for run in range(n_runs):
            run_str = "" if run == 0 else "_run{}".format(run)
            
            for trueparam in range(n_trueparams):
                trueparam_str = "" if trueparam == 0 else "_trueparam{}".format(trueparam)

                try:
                    this_result = np.load(
                        "{}/{}_{}_lhc{}{}{}_{}{}.npy".format(
                            result_dir, algo_filename, "2" if algo_dim == "2d" else "14",
                            algo_dim, algo_add, run_str, name, trueparam_str
                        )
                    )
                    if (not numpyfy) or (shape is None) or np.product(this_result.shape) == np.product(shape):
                        algo_results.append(this_result.reshape(shape))
                    else:
                        algo_results.append(np.nan*np.ones(shape))

                except FileNotFoundError as e:
                    # print(e)
                    if shape is None:
                        algo_results.append(None)
                    else:
                        algo_results.append(np.nan*np.ones(shape))
            
        all_results.append(algo_results)
    
    if numpyfy:
        all_results = np.array(all_results, dtype=np.float)
        all_results = all_results.reshape([all_results.shape[0], n_runs, n_trueparams] + list(shape))
        
    return all_results


model_gen_x = load("samples", None, numpyfy=False)
model_gen_closure = load("samples_manifold_distance", (10000,))



In [16]:
def load_mcmc(name, shape, numpyfy=True, result_dir="../data/results"):
    all_results = []
    
    for algo_filename, algo_add, algo_dim in zip(algo_filenames, algo_additionals, algo_dims):
        algo_results = []
            
        for run in range(n_runs):
            run_str = "" if run == 0 else "_run{}".format(run)
            
            for trueparam in range(n_trueparams):
                trueparam_str = "" if trueparam == 0 else "_trueparam{}".format(trueparam)
            
                for chain in range(n_chains):
                    chain_str = "" if chain == 0 else "_chain{}".format(chain)

                    try:
                        this_result = np.load(
                            "{}/{}_{}_lhc{}{}{}_{}{}{}.npy".format(
                                result_dir, algo_filename, "2" if algo_dim == "2d" else "14",
                                algo_dim, algo_add, run_str, name, trueparam_str, chain_str
                            )
                        )
                        if (not numpyfy) or (shape is None) or np.product(this_result.shape) == np.product(shape):
                            algo_results.append(this_result.reshape(shape))
                        else:
                            algo_results.append(np.nan*np.ones(shape))

                    except FileNotFoundError as e:
                        # print(e)
                        if shape is None:
                            algo_results.append(None)
                        else:
                            algo_results.append(np.nan*np.ones(shape))
            
        all_results.append(algo_results)
    
    all_results = np.array(all_results, dtype=np.float)
    all_results = all_results.reshape([all_results.shape[0], n_runs, n_trueparams, n_chains] + list(shape))
        
    return all_results


model_posterior_samples = load_mcmc("posterior_samples", (750, 2,))
model_posterior_samples.shape  # (algo, run, true param id, chain, sample, theta component)


(17, 10, 3, 4, 750, 2)

## Calculate metrics

In [17]:
min_logp = -100.
model_gen_mean_closure = np.mean(model_gen_closure, axis=(2,3))
model_gen_mean_closure.shape


(17, 10)

In [18]:
bandwidth = 0.1
true_param_points = np.array([[0.,0.], [0.5, 0.], [-1., -1.]])

model_true_log_posteriors = []

for algo, run, trueparam in product(range(n_algos), range(n_runs), range(n_trueparams)):
    mcmcs = model_posterior_samples[algo, run, trueparam].reshape((-1, 2))
    mcmcs = mcmcs[np.all(np.isfinite(mcmcs), axis=-1)]
    
    if len(mcmcs) == 0:
        model_true_log_posteriors.append(np.nan)
        continue
        
    kde = KernelDensity(kernel="gaussian", bandwidth=bandwidth)
    kde.fit(mcmcs)
    model_true_log_posteriors.append(kde.score(true_param_points[trueparam].reshape((1, 2))))

model_true_log_posteriors = np.mean(np.array(model_true_log_posteriors).reshape((n_algos, n_runs, n_trueparams)), axis=-1)
model_true_log_posteriors.shape


(17, 10)

## Outlier removal

In [19]:
def mean_err_without_outliers(data, remove=1):
    shape = list(data.shape)[:-1]
    data.reshape((-1, data.shape[-1]))
    
    means, errors = [], []
    
    for data_ in data:
        data_ = data_[np.isfinite(data_)]
        print(data_)
        if not len(data_) > 0:
            means.append(np.nan)
            errors.append(np.nan)
            continue
            
        if len(data_) > 2*remove + 1:
            for _ in range(remove):
                data_ = np.delete(data_, np.argmin(data_))
                data_ = np.delete(data_, np.argmax(data_))

        means.append(np.mean(data_))
        errors.append(np.std(data_) / len(data_)**0.5)
        
    return np.array(means).reshape(shape), np.array(errors).reshape(shape)
    
    
model_true_log_posteriors_mean, model_true_log_posteriors_std = mean_err_without_outliers(
    model_true_log_posteriors
)
model_gen_mean_closure_mean, model_gen_mean_closure_std = mean_err_without_outliers(model_gen_mean_closure)


[-14.82486563  -4.52275264 -10.19093009  -2.53592674  -2.33303727
 -14.76304872  -1.88213227  -2.68034498 -40.25120166 -43.30690621]
[]
[ -2.24110031 -17.85794527  -1.01861077  -1.46823725  -0.65175892
  -5.36812132  -1.58775059  -1.2288322   -2.25506705  -1.64728059]
[-2.16202101 -2.37498722 -1.18569417 -3.07452503 -1.30295209 -2.95724486
 -1.84078884 -3.18184704 -1.77053317 -4.79860184]
[-1.9146434  -2.50330305 -1.40244514 -2.30897597 -2.09041243 -2.10304909
 -2.33426144 -1.6369627  -2.46093558 -1.96940002]
[-2.29384509 -2.51023905 -2.50112291 -2.25225944 -6.60091795 -5.48628632
 -0.87997906 -2.01979283 -1.15689937 -2.35705979]
[-1.70915098 -1.92311406 -1.9780074  -5.59040486 -1.94534863 -3.14892095
 -1.96835788 -4.50457473 -1.89568776 -2.01129295]
[-0.00198507 -0.03823265  0.23025201 -0.00114512]
[]
[ 0.50301821  0.3043977  -0.93118446  0.50915962]
[ 0.1295876   0.36147325 -0.14480063 -0.26317358 -0.84098886]
[]
[ 0.62945544 -0.08982008  0.31441028]
[ 0.27421211 -0.10140903]
[-0.723

## Best metrics

In [20]:
best_closure, best_posterior = -1, -1

best_closure = np.nanargmin(model_gen_mean_closure_mean)
print(algo_labels[best_closure])
    
best_posterior = np.nanargmax(model_true_log_posteriors_mean)
print(algo_labels[best_posterior])


AF
PIE-SCANDAL (unconditional manifold)


## Print result table

In [21]:
def print_results(
    l_label=max([len(l) for l in algo_labels]), l_mean=6, l_err=5, latex=False, after_decs=(3,2)
):
    # Number of digits
    l_result = l_mean + 2 + l_err
    l_total = l_label + 1 + 2 * (3 + l_result)
        
    # Divider
    empty_result = "" if latex else " "*(l_result + 1)
    col_divider = "&" if latex else "|"
    line_end = r"\\" if latex else ""
    block_divider = r"\midrule" if latex else "-"*l_total
    
    # Number formatting
    def _f(val, err, after_dec, best=False):
        if not np.any(np.isfinite(val)):
            return empty_result
        
        result = "{:>{}.{}f}".format(val, l_mean, after_dec)
        if latex and best:
            result = r"\textbf{" + result + "}"
            
        err_str = "({:>{}.{}f})".format(err, l_err, after_dec)
        if latex:
            result += r"\,\textcolor{darkgray}{" + err_str + "}"
        else:
            result += err_str
            
        result += "*" if not latex and best else " "
        
        if latex:
            result = result.replace("-", "$-{}$")
            result = result.replace("darkgray", "dark-gray")
        return result
    
    
    # Header
    print(
        "{2:<{0}.{0}s} {5} {3:>{1}.{1}s} {5} {4:>{1}.{1}s} {6}".format(
            l_label, l_result, "", "Closure", "log p", col_divider, line_end
        )
    )
    print(block_divider)

    # Iterate over methods
    for i, (label, closure, closure_err, posterior, posterior_err) in enumerate(zip(
        algo_labels,
        model_gen_mean_closure_mean,
        model_gen_mean_closure_std,
        model_true_log_posteriors_mean,
        model_true_log_posteriors_std
    )):
        # Divider
        if i in algo_dividers:
            print(block_divider)
            
        # Print results
        print(
            "{1:<{0}.{0}s} {4} {2}{4} {3} {5}".format(
                l_label, label,
                _f(closure, closure_err, after_decs[0], i==best_closure),
                _f(posterior, posterior_err, after_decs[1], i==best_posterior),
                col_divider, line_end
            )
        )


In [22]:
print_results()

                                     |       Closure |         log p 
---------------------------------------------------------------------
AF                                   |  0.014(0.002)*| -11.51( 4.22)  
PIE (conditional manifold)           |               |                
PIE (unconditional manifold)         |  0.036(0.006) |  -2.10( 0.46)  
MFMF-M/D (sequential)                |  0.047(0.011) |  -2.33( 0.23)  
MFMF-M/D (alternating)               |  0.156(0.014) |  -2.10( 0.09)  
MFMFE-M/D (sequential)               |  0.056(0.010) |  -2.57( 0.42)  
MFMFE-M/D (alternating)              |  0.056(0.011) |  -2.42( 0.31)  
---------------------------------------------------------------------
AF-SCANDAL                           |  0.072(0.001) |  -0.00( 0.00)  
PIE-SCANDAL (conditional manifold)   |               |                
PIE-SCANDAL (unconditional manifold) |  0.160(0.008) |   0.40( 0.07)* 
MFMF-M/SCANDAL (sequential)          |  0.150(0.002) |  -0.09( 0.09)  
MFMF-M/SC

In [23]:
print_results(latex=True)

                                     &       Closure &         log p \\
\midrule
AF                                   & \textbf{ 0.014}\,\textcolor{dark-gray}{(0.002)} & $-{}$11.51\,\textcolor{dark-gray}{( 4.22)}  \\
PIE (conditional manifold)           & &  \\
PIE (unconditional manifold)         &  0.036\,\textcolor{dark-gray}{(0.006)} &  $-{}$2.10\,\textcolor{dark-gray}{( 0.46)}  \\
MFMF-M/D (sequential)                &  0.047\,\textcolor{dark-gray}{(0.011)} &  $-{}$2.33\,\textcolor{dark-gray}{( 0.23)}  \\
MFMF-M/D (alternating)               &  0.156\,\textcolor{dark-gray}{(0.014)} &  $-{}$2.10\,\textcolor{dark-gray}{( 0.09)}  \\
MFMFE-M/D (sequential)               &  0.056\,\textcolor{dark-gray}{(0.010)} &  $-{}$2.57\,\textcolor{dark-gray}{( 0.42)}  \\
MFMFE-M/D (alternating)              &  0.056\,\textcolor{dark-gray}{(0.011)} &  $-{}$2.42\,\textcolor{dark-gray}{( 0.31)}  \\
\midrule
AF-SCANDAL                           &  0.072\,\textcolor{dark-gray}{(0.001)} &  $-{}$0.00\,\t

## Individual run results

In [25]:
l_label=max([len(l) for l in algo_labels])
l_mean=6
after_decs=2

# How to format the numbers
l_result = 3 + n_runs*l_mean + (n_runs - 1)*2
l_total = l_label + 4 + l_result

# Divider
empty_result = " "*(l_result + 1)
col_divider = "|"
line_end = ""
block_divider = "-"*l_total
    
def _f(val, after_dec, best=False):
    if not np.any(np.isfinite(val)):
        return empty_result
    result = " [{:>{}.{}f}, ".format(np.nanmean(val[0]), l_mean, after_dec)
    for i in range(1, n_runs - 1):
        result += "{:>{}.{}f}, ".format(np.nanmean(val[i]), l_mean, after_dec)
    result += "{:>{}.{}f}]".format(np.nanmean(val[-1]), l_mean, after_dec)
    result = result.replace("nan", "   ")
    result += "*" if best else " "
    return result

# Print closure results
print(
    "{2:<{0}.{0}s} {4} {3:>{1}.{1}s} {5}".format(
        l_label, l_result, "", "Closure", col_divider, line_end
    )
)
print(block_divider)

for i, (label, closure) in enumerate(zip(algo_labels, model_gen_mean_closure)):
    # Divider
    if i in algo_dividers:
        print(block_divider)
            
    # Print results
    print("{1:<{0}.{0}s} {3} {2} {4}".format(
        l_label, label, _f(closure, after_decs, i==best_closure), col_divider, line_end
    ))

# Print posterior results
print("")
print(
    "{2:<{0}.{0}s} {4} {3:>{1}.{1}s} {5}".format(
        l_label, l_result, "", "Log posterior", col_divider, line_end
    )
)
print(block_divider)

for i, (label, posterior) in enumerate(zip(algo_labels, model_true_log_posteriors)):
    # Divider
    if i in algo_dividers:
        print(block_divider)
            
    # Print results
    print("{1:<{0}.{0}s} {3} {2} {4}".format(
        l_label, label, _f(posterior, after_decs, i==best_posterior), col_divider, line_end
    ))


                                     |                                                                           Closure 
-------------------------------------------------------------------------------------------------------------------------
AF                                   |  [  0.01,   0.02,   0.01,   0.01,   0.02,   0.02,   0.01,   0.02,   0.01,   0.01]* 
PIE (conditional manifold)           |                                                                                    
PIE (unconditional manifold)         |  [  0.05,   0.02,   0.04,   0.02,   0.06,   0.01,   0.02,   0.06,   0.10,   0.02]  
MFMF-M/D (sequential)                |  [  0.07,   0.01,   0.02,   0.03,   0.02,   0.21,   0.09,   0.09,   0.02,   0.03]  
MFMF-M/D (alternating)               |  [  0.17,   0.09,   0.13,   0.23,   0.12,   0.04,   0.23,   0.16,   0.15,   0.19]  
MFMFE-M/D (sequential)               |  [  0.08,   0.13,   0.05,   0.07,   0.03,   0.10,   0.02,   0.07,   0.01,   0.02]  
MFMFE-M/D (alterna

