# Power manifold results

## Setup

In [1]:
import sys
import numpy as np
from sklearn.metrics import roc_auc_score


## Load results

In [2]:
n_runs = 3

In [3]:
algo_filenames = []
algo_additionals = []
algo_labels = []
algo_dividers = []

def add_algo(filename, add, label):
    algo_filenames.append(filename)
    algo_additionals.append(add)
    algo_labels.append(label)
    
    
def add_divider():
    algo_dividers.append(len(algo_filenames))
    

add_algo("flow", "_small", "EF")
add_algo("flow", "_small_long", "EF (long)")
add_algo("flow", "_small_shallow_long", "EF (shallow, long)")

add_divider()
add_algo("pie", "_small", "PIE") 
add_algo("pie", "_small_pieepsilon01", "PIE (eps = 0.1)") 
add_algo("pie", "_small_pieepsilon003", "PIE (eps = 0.03)") 
add_algo("pie", "_small_pieepsilon0003", "PIE (eps = 0.003)") 
add_algo("pie", "_small_pieepsilon0001", "PIE (eps = 0.001)") 
add_algo("pie", "_small_pieepsilon00003", "PIE (eps = 0.0003)") 
add_algo("pie", "_small_pieepsilon00001", "PIE (eps = 0.0001)") 
add_algo("pie", "_small_long", "PIE (long)") 
add_algo("pie", "_small_shallow_long", "PIE (shallow, long)") 

add_divider()
add_algo("mf", "_small", "MLF-S")
add_algo("mf", "_small_morenll", "MLF-S (large lambda)")
add_algo("mf", "_small_long", "MLF-S (long)")
add_algo("mf", "_small_shallow_long", "MLF-S (shallow, long)")

add_divider()
add_algo("mf", "_small_alternate", "MLF-A")
add_algo("mf", "_small_alternate_wdecay", "MLF-A (wd)")
add_algo("mf", "_small_alternate_long", "MLF-A (long)")
add_algo("mf", "_small_alternate_wdecay_long", "MLF-A (long, wd)")
add_algo("mf", "_small_alternate_shallow_long", "MLF-A (shallow, long)")
add_algo("mf", "_small_alternate_wdecay_shallow_long", "MLF-A (shallow, long, wd)")

add_divider()
add_algo("gamf", "_small_largebs", "MLF-OT")
add_algo("gamf", "_small_wdecay_largebs", "MLF-OT (wd)")
add_algo("gamf", "_small_hugebs", "MLF-OT (5k bs)") 
add_algo("gamf", "_small_wdecay_hugebs", "MLF-OT (5k bs, wd)") 
add_algo("gamf", "_small_largebs_long", "MLF-OT (long)") 
add_algo("gamf", "_small_wdecay_largebs_long", "MLF-OT (long, wd)") 
add_algo("gamf", "_small_largebs_shallow_long", "MLF-OT (shallow, long)") 
add_algo("gamf", "_small_wdecay_largebs_shallow_long", "MLF-OT (shallow, long, wd)") 

add_divider()
add_algo("gamf", "_small_largebs", "MLF-OTA")
add_algo("gamf", "_small_wdecay_largebs", "MLF-OTA (wd)")
add_algo("gamf", "_small_hugebs", "MLF-OTA (5k bs)") 
add_algo("gamf", "_small_wdecay_hugebs", "MLF-OTA (5k bs, wd)") 
add_algo("gamf", "_small_largebs_long", "MLF-OTA (long)") 
add_algo("gamf", "_small_wdecay_largebs_long", "MLF-OTA (long, wd)") 
add_algo("gamf", "_small_largebs_shallow_long", "MLF-OTA (shallow, long)") 
add_algo("gamf", "_small_wdecay_largebs_shallow_long", "MLF-OTA (shallow, long, wd)") 

add_divider()
add_algo("emf", "_small", "EMLF-S")
add_algo("mef", "_small_morenll", "EMLF-S (large lambda)")
add_algo("emf", "_small_long", "EMLF-S (long)")
add_algo("emf", "_small_shallow_long", "EMLF-S (shallow, long)")

add_divider()
add_algo("emf", "_small_alternate", "EMLF-A")
add_algo("emf", "_small_alternate_wdecay", "EMLF-A (wd)")
add_algo("emf", "_small_alternate_long", "EMLF-A (long)")
add_algo("emf", "_small_alternate_wdecay_long", "EMLF-A (long, wd)")
add_algo("emf", "_small_alternate_shallow_long", "EMLF-A (shallow, long)")
add_algo("emf", "_small_alternate_wdecay_shallow_long", "EMLF-A (shallow, long, wd)")


In [4]:
def load(name, shape, numpyfy=True, result_dir="../data/results"):
    all_results = []
    
    for algo_filename, algo_add in zip(algo_filenames, algo_additionals):
        algo_results = []
            
        for run in range(n_runs):
            run_str = "" if run == 0 else "_run{}".format(run)
            try:
                algo_results.append(np.load(
                    "{}/{}_2_power{}{}_{}.npy".format(
                        result_dir, algo_filename, algo_add, run_str, name
                    )
                ))
            except FileNotFoundError as e:
                # print(e)
                if shape is None:
                    algo_results.append(None)
                else:
                    algo_results.append(np.nan*np.ones(shape))
            
        all_results.append(algo_results)
    
    if numpyfy:
        try:
            all_results = np.array(all_results, dtype=np.float)
        except Exception as e:
            print(name)
            print(len(all_results))
            print([len(res) for res in all_results])
            print([[len(r) for r in res] for res in all_results])
            raise
        
    return all_results


model_gen_x = load("samples", None, numpyfy=False)
model_gen_logp = load("samples_likelihood", (10000,))
model_gen_distance = load("samples_manifold_distance", (10000,))
model_test_logp = load("model_log_likelihood_test", (101, 1000,))
model_test_reco_error = load("model_reco_error_test", (1000,))
model_ood_logp = load("model_log_likelihood_ood", (101, 1000,))
model_ood_reco_error = load("model_reco_error_ood", (1000,))
model_posterior_samples = load("posterior_samples", (5000, 1), numpyfy=False)
model_mmds = load("mmd", (1,))


In [5]:
def load_truth(name, samples=True):
    if samples:
        return np.asarray([
            np.load("../data/samples/power/{}{}.npy".format(
                name, run_str
            ))
            for run_str in [""] + ["_run{}".format(i) for i in range(1, n_runs)]
        ])
    else:
        return np.asarray([
            np.load("../data/results/truth_power{}_{}.npy".format(
                run_str, name
            ))
            for run_str in [""] + ["_run{}".format(i) for i in range(1, n_runs)]
        ])

test_x = load_truth("x_test", True)
test_distance = np.zeros((test_x.shape[0], 1))
test_logp = load_truth("true_log_likelihood_test", False)
true_posterior_samples = load_truth("posterior_samples", False)

param_grid = np.linspace(-1, 1, 101)


## Calculate metrics

In [6]:
n_observed = 20
min_logp = -100.
max_distance = 10.

model_gen_mean_logp = np.mean(np.clip(model_gen_logp, min_logp, None), axis=2)
model_gen_mean_distance = np.mean(np.clip(model_gen_distance, None, max_distance), axis=2)
model_observed_nll = -2. * np.sum(model_test_logp[:,:,:,:n_observed], axis=-1)

test_mean_logp = np.mean(np.clip(test_logp, min_logp, None), axis=1)
test_mean_distance = np.mean(np.clip(test_distance, None, max_distance), axis=1)
true_observed_nll = -2. * np.sum(test_logp[:,:,:n_observed], axis=-1)


In [7]:
def calculate_roc_auc(x0, x1):
    assert x0.shape == x1.shape
    old_shape = x0.shape[:-1]
    x0 = x0.reshape(-1, x0.shape[-1])
    x1 = x1.reshape(-1, x1.shape[-1])
    
    aucs = []
    for x0_, x1_ in zip(x0, x1):
        if not np.all(np.isfinite(np.hstack((x0_, x1_)))):
            aucs.append(np.nan)
            continue
            
        auc = roc_auc_score(
            np.hstack((np.zeros(x0_.shape[0], dtype=np.int), np.ones(x1_.shape[0], dtype=np.int))),
            np.hstack((x0_, x1_)),
        )
        auc_flipped = roc_auc_score(
            np.hstack((np.zeros(x0_.shape[0], dtype=np.int), np.ones(x1_.shape[0], dtype=np.int))),
            - np.hstack((x0_, x1_)),
        )
        aucs.append(max(auc, auc_flipped))
        
    aucs = np.asarray(aucs)
    aucs = aucs.reshape(old_shape)
    return aucs


model_auc_logp = calculate_roc_auc(model_test_logp[:,:,50,:], model_ood_logp[:,:,50,:])
model_auc_err = calculate_roc_auc(model_test_reco_error, model_ood_reco_error)
model_auc_use_err = (model_auc_err > model_auc_logp)
model_auc = np.maximum(model_auc_err, model_auc_logp)




## Best metrics

In [19]:
best_mmd = np.nanargmin(np.nanmean(model_mmds, axis=1))
print(algo_labels[best_mmd])

best_dist = np.nanargmin(np.nanmean(model_gen_mean_distance, axis=1))
print(algo_labels[best_dist])

best_auc = np.nanargmin(np.nanmean(model_auc, axis=1))
print(algo_labels[best_dist])

MLF-A (long)
EF (long)
EF (long)


  """Entry point for launching an IPython kernel.
  after removing the cwd from sys.path.
  import sys


In [None]:
best_mmd = np.argmin(np.nanmean(model_mmds, axis=1))
print(algo_labels[best_mmd])

## Print metrics

In [36]:
def print_results(
    include_err=False, include_n_runs=False, l_label=max([len(l) for l in algo_labels]), l_mean=7, l_err=5
):
    # How to format the numbers
    l_result = l_mean + int(include_err) * (3 + l_err) + int(include_n_runs) * 4
    l_total = l_label + 1 + 3 * (3 + l_result)
    
    def _f(val):
        if not np.any(np.isfinite(val)):
            return " "*l_result
        
        result = "{:>{}.{}f}".format(np.nanmean(val), l_mean, l_mean - 2)
        if include_err:
            result += " ({:>{}.{}f})".format(np.nanstd(val), l_err, l_err - 2)
        if include_n_runs:
            result += " [{:1n}]".format(np.sum(np.isfinite(val)))
        return result
        
    
    # Header
    print(
        "{2:<{0}.{0}s} | {3:>{1}.{1}s} | {4:>{1}.{1}s} | {5:>{1}.{1}s} ".format(
            l_label, l_result, "", "Distance", "OOD AUC", "Posterior MMD"
        )
    )
    print("-"*l_total)

    # Iterate over methods
    for i, (label, dist, mmd, auc) in enumerate(zip(algo_labels, model_gen_mean_distance, model_mmds, model_auc)):
        # Divider
        if i in algo_dividers:
            print("-"*l_total)
            
        # Print results
        print("{1:<{0}.{0}s} | {2}{5}| {3}{6}| {4}{7}".format(
            l_label, label,
            _f(dist), _f(auc),  _f(mmd),
            "*" if i == best_dist else " ", "*" if i == best_auc else " ", "*" if i == best_mmd else " "
        ))


In [37]:
print_results()

                            | Distanc | OOD AUC | Posteri 
----------------------------------------------------------
EF                          | 0.02480 | 0.89290 | 0.10395 
EF (long)                   | 0.00773*| 0.93600 | 0.25326 
EF (shallow, long)          | 0.01359 | 0.91945 | 0.12238 
----------------------------------------------------------
PIE                         | 0.03746 | 0.84231 | 0.13966 
PIE (eps = 0.1)             | 0.01971 | 0.87777 | 0.12551 
PIE (eps = 0.03)            | 0.01295 | 0.87020 | 0.05324 
PIE (eps = 0.003)           | 0.01605 | 0.89039 | 0.11495 
PIE (eps = 0.001)           | 0.08156 | 0.79135 | 0.07989 
PIE (eps = 0.0003)          | 0.01421 | 0.87598 | 0.09799 
PIE (eps = 0.0001)          | 0.01920 | 0.89822 | 0.16706 
PIE (long)                  | 0.01677 | 0.91979 | 0.12031 
PIE (shallow, long)         | 0.00966 | 0.88591 | 0.09557 
----------------------------------------------------------
MLF-S                       | 0.37499 | 0.59537 | 0.1534

In [38]:
print_results(include_err=True)

                            |        Distance |         OOD AUC |   Posterior MMD 
----------------------------------------------------------------------------------
EF                          | 0.02480 (0.020) | 0.89290 (0.038) | 0.10395 (0.077) 
EF (long)                   | 0.00773 (0.003)*| 0.93600 (0.007) | 0.25326 (0.153) 
EF (shallow, long)          | 0.01359 (0.003) | 0.91945 (0.017) | 0.12238 (0.069) 
----------------------------------------------------------------------------------
PIE                         | 0.03746 (0.022) | 0.84231 (0.018) | 0.13966 (0.164) 
PIE (eps = 0.1)             | 0.01971 (0.008) | 0.87777 (0.039) | 0.12551 (0.116) 
PIE (eps = 0.03)            | 0.01295 (0.007) | 0.87020 (0.034) | 0.05324 (0.003) 
PIE (eps = 0.003)           | 0.01605 (0.007) | 0.89039 (0.043) | 0.11495 (0.055) 
PIE (eps = 0.001)           | 0.08156 (0.030) | 0.79135 (0.041) | 0.07989 (0.015) 
PIE (eps = 0.0003)          | 0.01421 (0.003) | 0.87598 (0.032) | 0.09799 (0.077) 
PIE 