# Planck Results Plotting

Notebook for plotting Figures 12 and 13 in "nestcheck: diagnostic tests for nested sampling calculations" ([Higson et al., 2018](https://arxiv.org/abs/1804.06406)), which use data from the *Planck* survey. For more details about the likelihood used, see the paper and references therein. The likelihood is public and so can be used to replicate the results in the paper. However note that this is computationally expensive.

### Set up

This notebook currently assumes the output files from  nested sampling runs using the *Planck* are stored in ``chains_planck`` with file roots of the form

    '{}_nrepeats_{}'.format(nrepeats, index)

### Imports

In [None]:
import warnings
import functools
import pandas as pd
import matplotlib.ticker
import matplotlib.pyplot as plt
import nestcheck.diagnostics_tables
import nestcheck.estimators as e
import nestcheck.data_processing
import nestcheck.plots
import diagnostic.results_plots
%matplotlib inline

# Check matplotlib parameters
# ---------------------------
matplotlib_settings = {'text.usetex': True,
                       'font.family': ['serif'],
                       'font.serif': ['Times New Roman'],
                       'font.size': 8}
for key, value in matplotlib_settings.items():
    if matplotlib.rcParams.get(key) != value:
        print('{}={} - the paper plots use {}'.format(
            key, matplotlib.rcParams.get(key), value))

### Load data

In [None]:
planck_dict = {}
for nrepeats in [1, 2, 3, 4, 5, 6, 7, 14, 21, 28, 35]:
    if nrepeats in [1, 2, 3, 4, 5, 6, 7, 28]:
        # reran first batch due to accidentally using an old PolyChord version with bug
        inds = list(range(25, 50))
    else:
        inds = list(range(25))
    print('nrepeats={}'.format(nrepeats))
    file_roots = ['{}_repeats_{}'.format(nrepeats, i) for i in inds]
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message=r'for logl=')
        run_list = nestcheck.data_processing.batch_process_data(
            file_roots, base_dir='chains_planck', parallel=True, errors_to_handle=(ValueError, OSError))
    planck_dict[nrepeats] = run_list

### Load LaTeX paramnames

In [None]:
# Get the quantities we want to plot, and their latex names
with open('chains_planck/1_repeats_1.paramnames', 'r') as file:
    data = file.read()
latex_names = [string.split('\t')[1] for string in data.split('\n')[:-1]]
estimator_list = [e.logz]
estimator_names = [e.get_latex_name(e.logz)]
for ind in range(8):
    estimator_list.append(functools.partial(e.param_mean, param_ind=ind))
    estimator_names.append('$\overline{{{}}}$'.format(latex_names[ind].replace(r'{\rm{ln}}', r'\log ')))
    # print names
    print('${}$'.format(latex_names[ind].replace(r'{\rm{ln}}', r'\log ')))

### Get results DataFrame

In [None]:
n_simulate = 100
planck_results = {}
for key, run_list in planck_dict.items():
    save_name = 'cache/errors_df_planck_{}repeats_{}runs_{}sim'.format(
        key, len(inds), n_simulate)
    planck_results[key] = nestcheck.diagnostics_tables.run_list_error_summary(
        run_list, estimator_list, estimator_names, n_simulate=n_simulate,
        save_name=save_name, save=True, load=True)
planck_df_in = pd.concat(planck_results)
planck_df_in.index.names = ['nrepeats'] + list(planck_df_in.index.names[1:]) 
planck_df = planck_df_in
planck_df

### Make line plot (Figure 12)

In [None]:
calculation_types = ['bootstrap std mean',
                     'implementation std',
                     'values std']
linestyles = ['-', '--', ':', '-.']  # to ensure linestyles match key, which is made as a seperate pdf
line_plot_inds = [[0, 1, 2, 4], [3, 5, 6, 8]]
# Make the plot in two halves
for i, plot_inds in enumerate(line_plot_inds):
    assert len(plot_inds) == 4
    df_temp = planck_df.loc[planck_df.index.get_level_values(0) >= 2]
    df_temp.columns = estimator_names
    df_temp = df_temp[[estimator_names[ind] for ind in plot_inds]]
    fig = diagnostic.results_plots.get_line_plot(
        df_temp, calculation_types, linestyles=linestyles,
        log_scale=True, left_margin=0.4, right_margin=0.06)
    for ax in fig.axes:
        # standard form numbers on y labels
        ax.get_yaxis().set_major_formatter(matplotlib.ticker.FormatStrFormatter('%.e'))
        ax.set_xlim([2 * (35 / 40), 40])
        ax.set_xticks([2, 4, 10, 20, 40])
        ax.set_xticklabels([2, 4, 10, 20, 40])
    savename = 'plots/line_nrepeats_planck_25runs_{}sim_{}.pdf'.format(
        n_simulate, i + 1).replace(' ', '_')
    fig.savefig(savename)

### Set up for color plots

In [None]:
# Plot settings
# -------------
nrun = 2
nrepeats = 1
colwidth = 3.32153 * 0.99
n_simulate = 500  # 500 for paper    
npoints = 200  # 200 for paper
start = 2
run_list = planck_dict[nrepeats][start:start +nrun]
# Get info about the parameters which have been plotted in the line plots
param_names = []
lims_dict = {}
fthetas_dict = {}
for ind, est_name in enumerate(estimator_names[1:]):  # exclude logZ (the first element)
    name = est_name.replace(r'\overline', '').replace(r'${', '$').replace(r'}$', '$')
    param_names.append(name)
    fthetas_dict[name] = functools.partial(diagnostic.results_utils.component_value, ind=ind)
    # Get default limits for all params for quick plotting
    lims_dict[name] = [e.param_cred(run_list[0], probability=prob, param_ind=ind) for prob in [0.001, 0.999]]
# Specify limits used for params in the paper
lims_dict['$\\Omega_b h^2$'] = [0.0215, 0.0235]
lims_dict['$\\Omega_c h^2$'] = [0.11, 0.13]
lims_dict['$\\tau$'] = [0.0, 0.225]
lims_dict['$y_{\\rm cal}$'] = [0.9925, 1.0075]

print(param_names)
params_to_plot = [param_names[i] for i in [0, 1, 3, 7]]
params_to_plot

### BS param plot (Figure 13)

In [None]:
assert len(params_to_plot) % 2 == 0
for i in range(len(params_to_plot) // 2):
    labels = params_to_plot[2 * i:2 * (i + 1)]
    cache_root = 'bs_param_dists_planck_{}nrepeats_{}sim_{}points_{}'.format(
        nrepeats, n_simulate, npoints, i + 1)
    fig = nestcheck.plots.bs_param_dists(
        run_list, labels=labels,
        fthetas=[fthetas_dict[name] for name in labels],
        ftheta_lims=[lims_dict[name] for name in labels],
        cache='cache/' + cache_root, figsize=(colwidth, 1),
        n_simulate=n_simulate, rasterize_contours=True,
        nx=npoints, ny=npoints)
    for i, name in enumerate(labels):
        if name == '$\\Omega_b h^2$':
            fig.axes[i].set_xticks([0.022, 0.023])
    # Ajust figure plot size manually for best use of latex space as
    # plt.layout_tight() does not work with the colorbars
    fig.subplots_adjust(left=0.05, right=0.93, bottom=0.35, top=0.98)
    fig.savefig('plots/' + cache_root + '.pdf', dpi=300)  # only contours are rasterised so dpi does not need to be that high

### LogX plot

Note this was not included in the final version of paper.

In [None]:
# Settings
# --------
print(params_to_plot)
figsize = (colwidth, 0.4 + 0.8 * len(params_to_plot))
bottom_margin = 0.4 / figsize[1]
cache_root = 'logx_planck_{}nrepeats_{}sim_{}points'.format(
    nrepeats, n_simulate, npoints)
# Make figure
# -----------
fig = nestcheck.plots.param_logx_diagram(
    run_list, labels=params_to_plot, 
    fthetas=[fthetas_dict[name] for name in params_to_plot],
    ftheta_lims=[lims_dict[name] for name in params_to_plot],
    cache='cache/' + cache_root, rasterize_contours=True,
    logx_min=-30,
    figsize=figsize, npoints=npoints, thin=0.25)
# fig.subplots_adjust(left=0.16, right=0.985, bottom=bottom_margin, top=0.995)
# fig.savefig('plots/' + cache_root + '.pdf', dpi=400)