# Diagnostics Paper Code

Code for making the figures and results tables in "Diagnostic tests for nested sampling calculations" ([Higson et al., 2018](https://arxiv.org/abs/1804.06406)). See the paper for a detailed explanation information about the plots and tables produced.

Requirements:
* Nested sampling runs saved in 'chains' - these can be generated with `generate_data.py`;
* `nestcheck`;
* `getdist` (<https://github.com/cmbant/getdist>).

Optional:
* `texunc` (<https://github.com/ejhigson/texunc>) can be used for automatically printing results tables in LaTeX format.

Before running, install `diagnostics` with pip so the utility functions can be accessed, e.g. by running from within the current directory

    $ pip install . --user

Figure 1 in the paper was produced with `tikz` and is not included. Output plots will be saved to `plots`.

# Set up for making plots

### Imports and settings

In [None]:
import functools
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import nestcheck.plots
import nestcheck.diagnostics_tables
import nestcheck.estimators as e
import diagnostics.results_plots
import diagnostics.data_loading
import diagnostics.results_utils
import diagnostics.settings
%matplotlib inline
# np.random.seed(0)
# pd.set_option('display.width', 200)
# matplotlib.rc('text', usetex=True)

# Set plot size, font and fontsize to match LaTeX template
# --------------------------------------------------------
# NB A4 paper is 8.27 × 11.69 inches (=210 × 297 mm)
# Font: \T1/ntxtlf/m/n/9
# Caption font: \T1/ntxtlf/m/n/8
# Footnote font: \T1/ntxtlf/m/n/8
# Abstract font: \T1/ntxtlf/m/n/10
textwidth = 6.97522 * 0.99  # make 1% smaller to ensure everything fits
textheight = 9.43869 * 0.99  # make 1% smaller to ensure everything fits
colwidth = 3.32153
# Check matplotlib parameters
# ---------------------------
matplotlib_settings = {'text.usetex': True,
                       'font.family': ['serif'],
                       'font.serif': ['Times New Roman'],
                       'font.size': 8}
for key, value in matplotlib_settings.items():
    if matplotlib.rcParams.get(key) != value:
        print('{}={} - the paper plots use {}'.format(
            key, matplotlib.rcParams.get(key), value))
        # matplotlib.rcParams[key] = value

# Define useful global variables
# ------------------------------
likelihood_list = ['Gaussian', 'LogGamma mix']  # 'Gaussian shell', 'Rastrigin', 'Rosenbrock']

# Fig 2: Triangle Plots

In [None]:
# Get runs for plotting
# ---------------------
# The same runs are used for Figures 2, 3 and 4
nrun = 2
ndim, nlive, nrepeats = diagnostics.settings.get_default_nd_nl_nr()
plot_run_dict = diagnostics.data_loading.get_run_list_dict(
    ['LogGamma mix'], nrun, nlive=nlive, nrepeats=nrepeats, ndim=ndim)
# Make the plots
# --------------
for likelihood_name in ['LogGamma mix']:
    gplot = diagnostics.results_plots.getdist_plot(
        plot_run_dict[likelihood_name], width_inch=colwidth, params=3,
        param_limits=diagnostics.settings.get_default_lims(likelihood_name, ndim))
    gplot.export('plots/triangle_{}_{}nlive_{}nrepeats.pdf'.format(
        likelihood_name.replace(' ', '_'), nlive, nrepeats))

# Fig 3: Posterior distributions with bootstrap uncertainty estimates

In [None]:
# Get runs for plotting
# ---------------------
# The same runs are used for Figures 2, 3 and 4
nrun = 2

plot_run_dict = diagnostics.data_loading.get_run_list_dict(
    likelihood_list, nrun, nlive=nlive, nrepeats=nrepeats, ndim=ndim)
# Settings
# --------
n_simulate = 50  # 500 for paper
npoints = 100  # 200 for paper

labels = [diagnostics.results_utils.param_latex_name(i) for i in range(1, 3)]

for name in likelihood_list:
    lab_list = []
    lab_list.append(diagnostics.results_utils.param_latex_name(1))
    lab_list.append(diagnostics.results_utils.param_latex_name(2))
    if name == 'LogGamma mix':
        lab_list.append(diagnostics.results_utils.param_latex_name(3))
        lab_list.append(r'$|\theta|$')
    assert len(lab_list) % 2 == 0
    for i in range(len(lab_list) // 2):
        labels = lab_list[2 * i:2 * (i + 1)]
        cache_root = 'bs_param_dists_{}_{}nlive_{}nrepeats_{}sim_{}points_{}'.format(
            name.replace(' ', '_'), nlive, nrepeats, n_simulate, npoints, i + 1)
        fig = nestcheck.plots.bs_param_dists(
            plot_run_dict[name], labels=['${}$'.format(lab) for lab in labels],
            fthetas=diagnostics.results_utils.get_ftheta_list(labels),
            ftheta_lims=[diagnostics.settings.get_default_lims(name)[lab] for lab in labels],
            cache='cache/' + cache_root, figsize=(colwidth, 1),
            n_simulate=n_simulate, rasterize_contours=True,
            nx=npoints, ny=npoints)
        # Ajust figure plot size manually for best use of latex space as
        # plt.layout_tight() does not work with the colorbars
        fig.subplots_adjust(left=0.05, right=0.93, bottom=0.35, top=0.98)
        fig.savefig('plots/' + cache_root + '.pdf', dpi=300)  # only contours are rasterised so dpi does not need to be that high

# Fig 4: Parameter logX Diagram

In [None]:
# Get runs for plotting
# ---------------------
# The same runs are used for Figures 2, 3 and 4
nrun = 2
ndim, nlive, nrepeats = diagnostics.settings.get_default_nd_nl_nr()
plot_run_dict = diagnostics.data_loading.get_run_list_dict(
    likelihood_list, nrun, nlive=nlive, nrepeats=nrepeats, ndim=ndim)

# Settings
# --------
n_simulate = 50  # 500 for paper  
npoints = 100  # 100 for paper  


for name in likelihood_list:
    lab_list = []
    lab_list.append(diagnostics.results_utils.param_latex_name(1))
    lab_list.append(diagnostics.results_utils.param_latex_name(2))
    if name == 'LogGamma mix':
        lab_list.append(diagnostics.results_utils.param_latex_name(3))
        lab_list.append(r'$|\theta|$')
    labels = lab_list
    figsize = (colwidth, 0.4 + 0.8 * len(labels))
    bottom_margin = 0.4 / figsize[1]
    if name in ['Gaussian', 'Gaussian shell']:
        run_list = plot_run_dict[name][:1]
    else:
        run_list = plot_run_dict[name]
    cache_root = 'param_logx_diagram_{}_{}nlive_{}nrepeats_{}sim_{}points'.format(
        name.replace(' ', '_'), nlive, nrepeats, n_simulate, npoints)
    ftheta_lims = [diagnostics.settings.get_default_lims(name)[lab] for lab in labels]
    if name == 'LogGamma mix':
        ftheta_lims[0][0] -= 10
    fig = nestcheck.plots.param_logx_diagram(
        run_list, labels=labels, 
        fthetas=diagnostics.results_utils.get_ftheta_list(labels),
        ftheta_lims=ftheta_lims,
        cache='cache/' + cache_root, rasterize_contours=True,
        logx_min=diagnostics.settings.default_logx_min(name, ndim),
        figsize=figsize, npoints=npoints)
    fig.subplots_adjust(left=0.16, right=0.985, bottom=bottom_margin, top=0.995)
    fig.savefig('plots/' + cache_root + '.pdf', dpi=300)

# Figure 5 and Tables: Implementation error bar chart and results tables

### Get error results data frame

In [None]:
# Settings
# --------
nrun = 10
n_simulate = 100
nd_nl_nr_list = [diagnostics.settings.get_default_nd_nl_nr()]

# Get data
errors_df_in = diagnostics.data_loading.get_results_df(
    likelihood_list, nd_nl_nr_list, n_simulate=n_simulate,
    nrun=nrun, summary=True, save=True, load=True, thread_pvalue=False,
    bs_stat_dist=False, include_rmse=True,
    include_true_values=True)

# Format_data
estimator_names_bar = [e.get_latex_name(est) for est in [e.logz,
                                                         e.param_mean,
                                                         functools.partial(e.param_mean, param_ind=1),
                                                         functools.partial(e.param_mean, param_ind=2)]]
                                                         # functools.partial(e.param_mean, param_ind=3)
                                                         # e.r_mean,
                                                         # e.param_squared_mean]
errors_df = errors_df_in.copy()[estimator_names_bar]
for i, level in enumerate(['ndim', 'nlive', 'nrepeats']): 
    errors_df = errors_df.xs(diagnostics.settings.get_default_nd_nl_nr()[i],
                             level=level)

In [None]:
errors_df_in # .columns[0]

### Plot bar chart (Fig 5)

In [None]:
# Make bar chart
fig = diagnostics.results_plots.ratio_bar_plot(errors_df, figsize=(colwidth, 1.2))
savename = 'plots/imp_error_test_{}runs_{}sim_{}nlive_{}nrepeats.pdf'.format(
    nrun, n_simulate, nlive, nrepeats)
fig.subplots_adjust(left=0.18, right=0.61, bottom=0.2, top=0.96)
fig.savefig(savename)

In [None]:
# Make results tables
str_map = {'true values': 'Correct Result',
           'values mean': 'Mean Calculation Result',
           'values std': r'Values St.Dev.\ ',
           'values rmse': 'Values RMSE',
           'bootstrap std mean': r'Bootstrap St.Dev.\ ',
           'implementation std': r'Implementation St.Dev.\ ',
           'implementation std frac': r'Imp St.Dev. / Val St.Dev.\ ',
           r'Implementation St.Dev.\  frac': r'Imp St.Dev./Val St.Dev.\ ',
           'mathrm{log}': 'log'}
df_dict = {}
for likelihood_name in likelihood_list:
    df = errors_df.xs(likelihood_name, level='likelihood')
    label = 'tab:' + likelihood_name.lower().replace(' ', '_')
    caption = ('As in \Cref{tab:gaussian} but for calculations using the ' + likelihood_name +
               r' likelihood~\eqref{equ:' + likelihood_name.lower().replace(' ', '_') + r'}.')
    try:
        import texunc
        df = texunc.print_latex_df(
            df, min_dp=1, min_dp_no_error=4, str_map=str_map, caption=caption,
            caption_above=False, label=label, zero_dp_ints=False)
        df.index = [str_map[ind] for ind in list(df.index)]
    except ImportError:
        pass
    # Also store the formatted df
    df_dict[likelihood_name] = df
pd.concat(df_dict)

# Figures 7 and 8:  Line plots of errors vs nlive and nrepeats

### Get a data frame of results

In [None]:
# Settings
# --------
nrun_lp = 10
n_simulate_lp = 100

nd_nl_nr_list = diagnostics.settings.get_nd_nl_nr_list(
        nd_list=[2, 4, 10], 
        nl_list=[10, 20, 50],
        nr_list=[1, 2, 5, 10])
# Run plots
results_df_in = diagnostics.data_loading.get_results_df(
    likelihood_list, nd_nl_nr_list, n_simulate=n_simulate_lp,
    nrun=nrun_lp, summary=True, save=True, load=True, thread_pvalue=False,
    bs_stat_dist=False, include_rmse=True, include_true_values=True)

### Make line plots

In [None]:
xaxes = ['ndim', 'nlive', 'nrepeats']
defaults = diagnostics.settings.get_default_nd_nl_nr()
for i, x_to_plot in enumerate(xaxes):
    print(x_to_plot)
    df_temp = results_df_in
    for j, x_to_remove in enumerate(xaxes):
        if i != j:
            df_temp = df_temp.xs(defaults[j], level=xaxes[j])
    for est in [e.logz, e.param_mean]:
        estimator_name = e.get_latex_name(est)
        print('\n', estimator_name)
        fig = diagnostics.results_plots.get_line_plot(df_temp, estimator_name, figsize=(colwidth, 3))
        # Manually adjust saving as described in (https://matplotlib.org/devdocs
        # /api/_as_gen/matplotlib.pyplot.subplots_adjust.html)
        fig.subplots_adjust(left=0.17, right=0.995, bottom=0.07, top=0.99,
                            hspace=0)
        savename = 'plots/line_{}_{}runs_{}sim_{}.pdf'.format(
            x_to_plot, nrun_lp, n_simulate_lp, est.__name__)
        fig.savefig(savename)

### Make legend in seperate file

In [None]:
fig = plt.figure(figsize=(textwidth, 0.3))
for i, label in enumerate(['result values', 'mean bootstrap estimate', 'implementation error']):
    plt.plot([0,1],[-1,-1], label=label)
plt.legend(ncol=3, loc='center')
plt.gca().set_ylim(bottom=0)
plt.axis('off')
fig.savefig('plots/line_plot_legend.pdf')

# Figures 9, 11, 12 and 13: Histograms

In [None]:
# Settings
# --------
# first draft used nrun=500, nlive=1000, nrepeats=5
nlive = 200  # 1000 for paper
nrepeats = 10
nrun = 100  
n_simulate = 100  # 100 for paper
ndim, nlive, nrepeats = diagnostics.settings.get_default_nd_nl_nr()
vals_df_in = diagnostics.data_loading.get_results_df(
    likelihood_list, [(ndim, nlive, nrepeats)], n_simulate=n_simulate, nrun=nrun,
    summary=False, save=True, load=True, thread_pvalue=True, bs_stat_dist=True)

In [None]:
xlims = {'thread ks pvalue': [0,1],
         'thread ks distance': [0,0.3],
         'thread earth mover distance': [0, 0.2],
         'thread energy distance': [0, 0.4],
         'bootstrap ks pvalue': [0,1],
         'bootstrap ks distance': [0,1],
         'bootstrap earth mover distance': [0, 0.25],
         'bootstrap energy distance': [0, 0.8]}

for i, est in enumerate([e.param_mean, functools.partial(e.param_mean, param_ind=1)]):
    for calc in ['thread ks pvalue', 'bootstrap ks distance', 'bootstrap earth mover distance', 'bootstrap energy distance']:
        print(calc)
        fig = diagnostics.results_plots.hist_plot(
            vals_df_in, calc, e.get_latex_name(est),
            likelihood_list=likelihood_list,
            xlim=xlims[calc], nbin=50, figsize=(colwidth, 1))
        savename = 'plots/hist_{}_theta{}_{}runs_{}sim_{}nlive_{}nrepeats.pdf'.format(
            calc.replace(' ', '_'), i + 1, nrun, n_simulate, nlive, nrepeats)
        fig.subplots_adjust(left=0.096, right=0.985, bottom=0.29, top=0.98)
        fig.savefig(savename)

# Figure 10: 1d KDE distributions of bootstrap sampling error estimates

In [None]:
# Settings
# --------
# first draft used nlive=1000, nsimulate=100
ndim, nlive, nrepeats = diagnostics.settings.get_default_nd_nl_nr()
n_simulate = 100  # paper uses 1000
kde_run_dict = diagnostics.data_loading.get_run_list_dict(
    likelihood_list, nrun, nlive=nlive, nrepeats=nrepeats, ndim=ndim)

estimator_list_1dkde = [e.logz,
                        e.param_mean,
                        functools.partial(e.param_mean, param_ind=1)]
estimator_names_1dkde = [e.get_latex_name(est) for est in estimator_list_1dkde]
    
bs_dict = {}
for likelihood_name in likelihood_list:
    bs_dict[likelihood_name] = nestcheck.diagnostics_tables.bs_values_df(
        kde_run_dict[likelihood_name], estimator_list_1dkde, estimator_names_1dkde, n_simulate)

In [None]:
for likelihood_name in likelihood_list:
    print(likelihood_name)
    bs_df = bs_dict[likelihood_name].iloc[[2, 3]]
    fig = nestcheck.plots.kde_plot_df(
        bs_df, figsize=(textwidth * 0.5, 1.2), num_xticks=3)
    fig.subplots_adjust(left=0.03, right=0.97, bottom=0.35, top=0.99)
    fig.savefig('plots/1dkde_' + likelihood_name.replace(' ', '_') + '_' + str(nlive) + 'nlive_' + str(nrepeats) + 'nrepeats_' + str(n_simulate) + 'sim.pdf')