# perfectns paper results

This notebook contains the code used to make perfect nested sampling results and plots with `perfectns`. For more information see the dynamic nested sampling paper ([Higson et al., 2017](https://arxiv.org/abs/1704.03459)) and the [perfectns module](https://github.com/ejhigson/perfectns).

The results tables needed to run this notebook are cached in the `perfectns_results` folder so it should not take very long to run. You can also calculate the nested sampling runs data yourself using `make_perfectns_runs.py`, although this is more computationally intensive. All the code use `numpy` random seeding by default so the results in the paper should be reproducible.

# Set everything up

In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import perfectns.priors as priors
import perfectns.likelihoods as likelihoods
import perfectns.settings
import perfectns.estimators as e
import perfectns.plots
import perfectns.results_tables
import nestcheck.diagnostics_tables
import nestcheck.pandas_functions
import nestcheck.parallel_utils
%matplotlib inline

# Set plot size, font and fontsize to match LaTeX template
# --------------------------------------------------------
# NB A4 paper is 8.27 × 11.69 inches (=210 × 297 mm)
# Font (=caption font): \OT1/ptm/m/n/10 (= times size 10)
# Footnote font: \OT1/cmr/m/n/10
# Abstract font: \OT1/cmr/m/n/10.95
fontsize = 8.5  # matplotlib default is 10
textwidth = 6.85066 * 0.99  # make 1% smaller to ensure everything fits
textheight = 9.2144 * 0.99  # make 1% smaller to ensure everything fits
colwidth = 3.30719
matplotlib.rc('text', usetex=True)
matplotlib.rc('font', family='serif', serif='Times New Roman', size=fontsize)

# Get estimator functions
# -----------------------

# Estimators for running results
estimator_list = [e.LogZ(),
                  e.ParamMean(),
                  e.ParamSquaredMean(),
                  e.ParamCred(0.5),
                  e.ParamCred(0.84),
                  e.RMean(),
                  e.RCred(0.5),
                  e.RCred(0.84)]
# Estimator names for plotting
ests_to_plot_objects = [e.LogZ(),
                        e.ParamMean(),
                        e.ParamCred(0.5),
                        e.ParamCred(0.84),
                        e.RMean(),
                        e.RCred(0.5)]
ests_to_plot = [est.latex_name for est in ests_to_plot_objects]
latex_str_map = {'mathrm{log}': 'log',
                 'std': 'St.Dev.\\',
                 'St.Dev. ': 'St.Dev.\\ ',
                 'None': ''}

# Plot analytic relative posterior mass in log X

In [None]:
logx = np.linspace(-35, 0, 200)
dim_list = [2, 10]
prior = perfectns.priors.Gaussian(10)
# Gaussian and Exp Power
likelihoods_list = [perfectns.likelihoods.Gaussian(1),
                    perfectns.likelihoods.ExpPower(1, 2),
                    perfectns.likelihoods.ExpPower(1, 0.75)]
fig = perfectns.plots.plot_rel_posterior_mass(likelihoods_list, prior, dim_list, logx, figsize=(textwidth, 2))
# adjust manually as matplotlib autorescaling is not working well here
fig.subplots_adjust(left=0.05, right=0.99, bottom=0.21, top=0.99)
fig.savefig('plots/an_weights_3like.pdf')  #, bbox_inches='tight', pad_inches=0)
# Gaussian and Cauchy
likelihoods_list = [perfectns.likelihoods.Gaussian(1),
                    perfectns.likelihoods.Cauchy(1)]
# adjust manually as matplotlib autorescaling is not working well here
fig = perfectns.plots.plot_rel_posterior_mass(likelihoods_list, prior, dim_list, logx, figsize=(textwidth, 2))
fig.subplots_adjust(left=0.05, right=0.99, bottom=0.21, top=0.99)
fig.savefig('plots/an_weights_2like.pdf') #, bbox_inches='tight', pad_inches=0)

# Plot nlive as a function of log X

In [None]:
nlive_settings = perfectns.settings.PerfectNSSettings()
nlive_settings.n_dim = 10
nlive_settings.prior = priors.Gaussian(10)
nlive_settings.ninit = 50
nlive_settings.nlive_const = 500
# Gaussian
# --------
nlive_settings.likelihood = likelihoods.Gaussian(1)
fig = perfectns.plots.plot_dynamic_nlive([None, 0, 0.25, 1], nlive_settings,
                                         ymax=3000, logx_min=-35, save=False, load=False,
                                         figsize=(textwidth, 2.2))
# adjust manually as matplotlib autorescaling is not working well here
fig.subplots_adjust(left=0.085, right=0.99, bottom=0.185, top=0.975)
fig.savefig('plots/nlive_gaussian.pdf')
# Exp Power b=2
# -------------
nlive_settings.likelihood = likelihoods.ExpPower(1, power=2)
fig = perfectns.plots.plot_dynamic_nlive([None, 0, 0.25, 1], nlive_settings,
                                         ymax=5000, logx_min=-40, save=False, load=False,
                                         figsize=(textwidth, 2.2))
# adjust manually as matplotlib autorescaling is not working well here
fig.subplots_adjust(left=0.085, right=0.99, bottom=0.185, top=0.975)
fig.savefig('plots/nlive_exp_power_2.pdf')
# Exp Power b=3/4
# ---------------
nlive_settings.likelihood = likelihoods.ExpPower(1, power=0.75)
fig = perfectns.plots.plot_dynamic_nlive([None, 0, 0.25, 1], nlive_settings,
                                         ymax=2500, logx_min=-30, save=False, load=False,
                                         figsize=(textwidth, 2.2))
# adjust manually as matplotlib autorescaling is not working well here
fig.subplots_adjust(left=0.085, right=0.99, bottom=0.185, top=0.975)
fig.savefig('plots/nlive_exp_power_0_75.pdf')
# Cacuhy with tuned param
# -----------------------
nlive_settings.likelihood = likelihoods.Cauchy(1)
fig = perfectns.plots.plot_dynamic_nlive([None, 0, 1, 1], nlive_settings,
                               ymax=2000, logx_min=-40, save=False, load=False,
                               tuned_dynamic_ps=[False, False, False, True],
                               figsize=(textwidth, 2.2))
# adjust manually as matplotlib autorescaling is not working well here
fig.subplots_adjust(left=0.085, right=0.99, bottom=0.185, top=0.975)
fig.savefig('plots/nlive_cauchy_tuned.pdf')

# Dynamic results tables

This cell produces the results tables showing the efficiency gain from dynamic nested sampling as `pandas` DataFrames. It returns two lists: dynamic_results_list has full results (including mean values for each estimator), and paper_format_df_list contains the results tables shown in the paper.

Optionally, you can install `texunc` ([https://github.com/ejhigson/texunc](https://github.com/ejhigson/texunc)) to print the data in the LaTeX format used in the paper.

In [None]:
tab_settings = perfectns.settings.PerfectNSSettings()
tab_settings.ninit = 50
tab_settings.nlive_const = 500
tab_settings.prior = priors.Gaussian(prior_scale=10)
tab_settings.n_dim = 10

dynamic_results_list = []
paper_format_df_list = []
# Gaussian and exp power results
# ------------------------------
for likelihood in [likelihoods.Gaussian(1), likelihoods.ExpPower(1, 2), likelihoods.ExpPower(1, 0.75)]:
    tab_settings.likelihood = likelihood
    print(type(likelihood).__name__, likelihood.__dict__)
    dynamic_results_list.append(perfectns.results_tables.get_dynamic_results(
        5000, [0, 0.25, 1], estimator_list, tab_settings, save=True, load=True, cache_dir='perfectns_results'))
    paper_format_df_list.append(nestcheck.pandas_functions.paper_format_efficiency_gain_df(
        dynamic_results_list[-1][['samples'] + ests_to_plot]))
    try:
        import texunc
        texunc.print_latex_df(paper_format_df_list[-1], min_dp=1, str_map=latex_str_map)
    except ImportError:
        print('Install texunc to get the results tables in the format used in the paper LaTeX file.')
# Cauchy results (with tuning)
# ----------------------------
tab_settings.likelihood = likelihoods.Cauchy(1)
dynamic_results_list.append(perfectns.results_tables.get_dynamic_results(
    1000, [0, 1, 1], estimator_list, tab_settings, save=True, load=True, cache_dir='perfectns_results',
    tuned_dynamic_ps=[False, False, True]))
ests_to_plot_cauchy = [est.replace(e.ParamCred(0.5).latex_name, e.ParamSquaredMean().latex_name)
                       for est in ests_to_plot]
paper_format_df_list.append(nestcheck.pandas_functions.paper_format_efficiency_gain_df(
    dynamic_results_list[-1][['samples'] + ests_to_plot_cauchy]))
try:
    import texunc
    texunc.print_latex_df(paper_format_df_list[-1], min_dp=1, str_map=latex_str_map)
except ImportError:
    print('Install texunc to get the results tables in the format used in the paper LaTeX file.')

# Dynamic results KDE plot

In [None]:
import os
import perfectns.nested_sampling

kde_settings = perfectns.settings.PerfectNSSettings()
kde_settings.ninit = 50
kde_settings.nlive_const = 500
kde_settings.likelihood = likelihoods.Gaussian(likelihood_scale=1)
kde_settings.prior = priors.Gaussian(prior_scale=10)
kde_settings.n_dim = 10
# load runs and get results values
values_dict = {}
samp_max = {}
n_run = 5000
dynamic_goals = [None, 0, 0.25, 1]
keys = []
cache_dir = 'perfectns_results'
for dynamic_goal in dynamic_goals:
    kde_settings.dynamic_goal = dynamic_goal
    save_name = os.path.join(cache_dir, 'values_' + kde_settings.save_name()) + '_{}reps'.format(n_run)
    try:
        # Try loading cached values dataframe without generating runs
        values_df = nestcheck.diagnostics_tables.estimator_values_df(
            None, estimator_list, save_name=save_name, save=True, load=True)
    except TypeError:
        # If the values df is not cached, recalculate it
        print(('dynamic_goal={}: I could not load the cached values DataFrame so am generating new data.'
               .format(dynamic_goal)))
        # To load the same runs used for efficiency gain tables, note that the dynamic ones
        # use a max number of samples determined by the standard ones and have "_sampmax"
        # appended to their file names. To load these we
        # need to set settings.n_samples_max to not None and tell get_run_data not to check
        # it is the exact right value
        if dynamic_goal is not None:
            kde_settings.n_samples_max = 1  # comment this out if you are generating new runs rather than loading!
        run_list = perfectns.nested_sampling.get_run_data(
            kde_settings, n_run, cache_dir=cache_dir, check_loaded_settings=False, load=True, save=True)
        values_df = nestcheck.diagnostics_tables.estimator_values_df(
            run_list, estimator_list, save_name=save_name, save=True, load=True)
    if dynamic_goal is None:
        key = 'standard'
    else:
        key = '$G=' + str(dynamic_goal) + '$'
    keys.append(key)
    values_dict[key] = values_df
# Plot KDE
kde_df = pd.DataFrame(index=keys)
for i, est in enumerate(estimator_list):
    kde_df[est.latex_name] = [values_dict[dg].values[:, i] for dg in keys]
kde_df

In [None]:
import nestcheck.plots
kde_ests = [e.LogZ(), e.ParamMean(), e.ParamCred(0.84), e.RCred(0.5)]
kde_ests = ests_to_plot_objects
true_values = e.get_true_estimator_values(kde_ests, kde_settings)
for i, val in enumerate(true_values):
    if np.isnan(val):
        true_values[i] = np.mean(np.stack(kde_df[kde_ests[i].latex_name].values))
widths = [0.75, 0.06, 0.07, 0.1, 0.09, 0.10]
xlims = {}
for i, est in enumerate(kde_ests):
    xlims[est.latex_name] = [true_values[i] - widths[i], true_values[i] + widths[i]]
fig = nestcheck.plots.kde_plot_df(kde_df[ests_to_plot], xlims=xlims,
                                  figsize=(textwidth, 2.5), nrows=2, legend=True,
                                  legend_kwargs={'loc': 'right', 'bbox_to_anchor': (1, 0.6)})
for i, ax in enumerate(fig.axes):
    ax.axvline(true_values[i], color='black', linestyle=':')
fig.subplots_adjust(hspace=0.6, left=0.02, right=0.83, bottom=0.17, top=0.99)
fig.savefig('plots/gaussian_dynamic_results_kde.pdf')

# Efficiency Gain Line Plots

In [None]:
dim_scale_list = []
dim_scale_list += [(2, s) for s in [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100]]
dim_scale_list += [(d, 10) for d in [2, 5, 10, 20, 50, 100, 200, 500, 1000]]
results_in = perfectns.results_tables.merged_dynamic_results(
    dim_scale_list,
    [likelihoods.Gaussian(1), likelihoods.ExpPower(1, 2), likelihoods.ExpPower(1, 0.75)],
    perfectns.settings.PerfectNSSettings(nlive_const=200, ninit=20),
    estimator_list, cache_dir='perfectns_results',
    load=True, save=True)
results_in = results_in.xs('std efficiency gain', level='calculation type')
results_in

In [None]:
likelihood_names = [list(set(results_in.index.get_level_values('likelihood')))[i] for i in [0, 1, 2]]
# Gain vs dimension
# -----------------
ymax = {'Exp Power, $b=2$': 80, 'Exp Power, $b=\\frac{3}{4}$': 10}  # upper limits of y axis 
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(textwidth, 5))
fig.subplots_adjust(hspace=0) 
results = results_in.xs(10, level=r'$\sigma_\pi$')
for i, likelihood in enumerate(likelihood_names):
    df = results.xs(likelihood, level='likelihood')
    ax = axes[i]
    ax.set_title(likelihood.replace(',', ' '), y=0.7)
    for col in ests_to_plot:
        if col == e.LogZ().latex_name:
            dyn_set = '$G=0$'
        else:
            dyn_set = '$G=1$'
        ser = df.xs('dynamic ' + dyn_set, level='dynamic settings')[col]
        ser = ser.sort_index()
        ser.xs('value', level='result type').plot.line(
            label=dyn_set + ': ' + col,
            yerr=ser.xs('uncertainty', level='result type'), ax=ax)
    if i == 0:
        ax.legend(ncol=3, bbox_to_anchor=(0.5, 1), loc='lower center')
    # make sure the labels of plots above and below each other don't clash
    try:
        ax.set_ylim([0, ymax[likelihood]])
    except KeyError:
        ax.set_ylim([0, ax.get_yticks()[-1]])
    if i != 0:
        labels = ax.get_yticks().tolist()
        ax.set_yticks(labels[:-1])
    ax.tick_params(top=True, direction='inout')
    ax.yaxis.set_ticks_position('both')
    ax.set_xscale('log')
    ax.set_ylim(bottom=0)
    ax.set_xlim(left=1)
    if i != 2:
        ax.set_xticklabels([])
    ax.set_ylabel('efficiency gain')
savename = ('plots/eff_gain_dim.pdf')
fig.savefig(savename, bbox_inches='tight', pad_inches=0)

In [None]:
# Gain vs prior_scale
# -------------------
results = results_in.xs(2, level=r'dimension $d$')
results = results[[e.LogZ().latex_name, e.ParamMean().latex_name]]
fig = plt.figure(figsize=(textwidth, 2))
ax = plt.gca()
for col in results.columns:
    if col == e.LogZ().latex_name:
        dyn_set = '$G=0$'
        linestyle = '-'
    else:
        dyn_set = '$G=1$'
        linestyle = '--'
    for likelihood in likelihood_names:
        df = results.xs(likelihood, level='likelihood')
        ser = df.xs('dynamic ' + dyn_set, level='dynamic settings')[col]
        ser = ser.sort_index()
        ser.xs('value', level='result type').plot.line(
               label=likelihood + ', ' + dyn_set + ': ' + col,
               yerr=ser.xs('uncertainty', level='result type'), linestyle=linestyle)
ax.legend(ncol=2, bbox_to_anchor=(0.5, 1), loc='lower center')
ax.set_xscale('log')
ax.tick_params(top=True, direction='inout')
ax.yaxis.set_ticks_position('both')
ax.set_ylim(bottom=0)
# ax.set_xlim(left=1)
ax.set_ylabel('efficiency gain')
savename = ('plots/eff_gain_prior_scale.pdf')
fig.savefig(savename, bbox_inches='tight', pad_inches=0)

In [None]:
# see 1000d results
results_in.xs(1000, level=r'dimension $d$')

# Bootstrap results table

In [None]:
bs_settings = perfectns.settings.PerfectNSSettings()
bs_settings.likelihood = likelihoods.Gaussian(likelihood_scale=1)
bs_settings.prior = priors.Gaussian(prior_scale=10)
bs_settings.n_dim = 3
bs_settings.nlive_const = 200
bs_settings.ninit = 20
bs_settings.dynamic_goal = 1
bs_settings.dims_to_sample = bs_settings.n_dim
estimator_list_bs = [e.LogZ(),
                     e.ParamMean(),
                     e.ParamSquaredMean(),
                     e.ParamCred(0.5),
                     e.ParamCred(0.84),
                     e.RMean(from_theta=True),
                     e.RCred(0.5, from_theta=True),
                     e.RCred(0.84, from_theta=True)]
bootstrap_results_table = perfectns.results_tables.get_bootstrap_results(5000, 200,
                                                   estimator_list_bs, bs_settings,
                                                   n_run_ci=500,
                                                   n_simulate_ci=1000,
                                                   add_sim_method=False,
                                                   cred_int=0.95,
                                                   ninit_sep=True,
                                                   parallel=True,
                                                   cache_dir='perfectns_results',
                                                   save=True, load=True)
try:
    import texunc
    texunc.print_latex_df(bootstrap_results_table[ests_to_plot], min_dp_no_error=1, str_map=latex_str_map)
except ImportError:
    print('Install texunc to get the results tables in the format used in the paper LaTeX file.')
bootstrap_results_table[ests_to_plot]