# Bootstrap distances to the future

Estimate uncertainty of distance to the future values per sample and model using the bootstrap of observed distances across time.

## Define inputs, outputs, and parameters

In [None]:
# Define inputs.
model_distances = snakemake.input.model_distances

# Define outputs.
output_table = snakemake.output.output_table
bootstrap_figure_for_simulated_sample = snakemake.output.bootstrap_figure_for_simulated_sample
bootstrap_figure_for_natural_sample = snakemake.output.bootstrap_figure_for_natural_sample

# Define parameters.
n_bootstraps = snakemake.params.n_bootstraps

error_types = ["validation", "test"]

## Import dependencies

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline

## Configure plots and analyses

In [None]:
sns.set_style("white")

In [None]:
# Display figures at a reasonable default size.
mpl.rcParams['figure.figsize'] = (6, 4)

# Disable top and right spines.
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
    
# Display and save figures at higher resolution for presentations and manuscripts.
mpl.rcParams['savefig.dpi'] = 200
mpl.rcParams['figure.dpi'] = 120

# Display text at sizes large enough for presentations and manuscripts.
mpl.rcParams['font.weight'] = "normal"
mpl.rcParams['axes.labelweight'] = "normal"
mpl.rcParams['font.size'] = 14
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['legend.fontsize'] = 12
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12

mpl.rc('text', usetex=False)

In [None]:
color_by_predictor = {
    'naive': '#cccccc',
    'offspring': '#000000',
    'normalized_fitness': '#999999',
    'fitness': '#000000',
    'ep': '#4575b4',
    'ep_wolf': '#4575b4',
    'ep_star': '#4575b4',
    'ep_x': '#4575b4',
    'ep_x_koel': '#4575b4',
    'ep_x_wolf': '#4575b4',
    'oracle_x': '#4575b4',
    'rb': '#4575b4',
    'cTiter': '#91bfdb',
    'cTiter_x': '#91bfdb',
    'cTiterSub': '#91bfdb',
    'cTiterSub_star': '#91bfdb',
    'cTiterSub_x': '#91bfdb',
    'fra_cTiter_x': '#91bfdb',
    'ne_star': '#2ca25f',
    'dms_star': '#99d8c9',
    "dms_nonepitope": "#99d8c9",
    "dms_entropy": "#99d8c9",
    'unnormalized_lbi': '#fc8d59',
    'lbi': '#fc8d59',
    'delta_frequency': '#d73027',
    'ep_x-ne_star': "#ffffff",
    'ep_star-ne_star': "#ffffff",
    'lbi-ne_star': "#ffffff",
    'ne_star-lbi': "#ffffff",
    'cTiter_x-ne_star': "#ffffff",
    'cTiter_x-ne_star-lbi': "#ffffff",
    'fra_cTiter_x-ne_star': "#ffffff"
}

histogram_color_by_predictor = {
    'naive': '#cccccc',
    'offspring': '#000000',
    'normalized_fitness': '#000000',
    'fitness': '#000000',
    'ep': '#4575b4',
    'ep_wolf': '#4575b4',
    'ep_star': '#4575b4',
    'ep_x': '#4575b4',
    'ep_x_koel': '#4575b4',
    'ep_x_wolf': '#4575b4',
    'oracle_x': '#4575b4',
    'rb': '#4575b4',
    'cTiter': '#91bfdb',
    'cTiter_x': '#91bfdb',
    'cTiterSub': '#91bfdb',
    'cTiterSub_star': '#91bfdb',
    'cTiterSub_x': '#91bfdb',
    'fra_cTiter_x': '#91bfdb',
    'ne_star': '#2ca25f',
    'dms_star': '#99d8c9',
    "dms_nonepitope": "#99d8c9",
    "dms_entropy": "#99d8c9",
    'unnormalized_lbi': '#fc8d59',
    'lbi': '#fc8d59',
    'delta_frequency': '#d73027',
    'ep_x-ne_star': "#999999",
    'ep_star-ne_star': "#999999",
    'lbi-ne_star': "#999999",
    'ne_star-lbi': "#999999",
    'cTiter_x-ne_star': "#999999",
    'cTiter_x-ne_star-lbi': "#999999",
    'fra_cTiter_x-ne_star': "#999999"
}

name_by_predictor = {
    "naive": "naive",
    "offspring": "observed fitness",
    "normalized_fitness": "true fitness",
    "fitness": "estimated fitness",
    "ep": "epitope mutations",
    "ep_wolf": "Wolf epitope mutations",
    "ep_star": "epitope ancestor",
    "ep_x": "epitope antigenic\nnovelty",
    "ep_x_koel": "Koel epitope antigenic novelty",
    "ep_x_wolf": "Wolf epitope antigenic novelty",
    "oracle_x": "oracle antigenic novelty",
    "rb": "Koel epitope mutations",
    "cTiter": "antigenic advance",
    "cTiter_x": "HI antigenic novelty",
    "cTiterSub": "linear HI mut phenotypes",
    "cTiterSub_star": "ancestral HI mut phenotypes",
    "cTiterSub_x": "HI sub cross-immunity",
    "fra_cTiter_x": "FRA antigenic novelty",
    "ne_star": "mutational load",
    "dms_star": "DMS mutational\neffects",
    "dms_nonepitope": "DMS mutational load",
    "dms_entropy": "DMS entropy",
    "unnormalized_lbi": "unnormalized LBI",
    "lbi": "LBI",
    "delta_frequency": "delta frequency",
    'ep_x-ne_star': "mutational load +\nepitope antigenic\nnovelty",
    'ep_star-ne_star': "mutational load +\nepitope ancestor",
    'lbi-ne_star': "mutational load +\n LBI",
    'ne_star-lbi': "mutational load +\n LBI",
    'cTiter_x-ne_star': "mutational load +\nHI antigenic novelty",
    'cTiter_x-ne_star-lbi': "mutational load +\nHI antigenic novelty +\nLBI",
    'fra_cTiter_x-ne_star': "mutational load +\nFRA antigenic novelty"
}

name_by_sample = {
    "simulated_sample_3": "simulated populations",
    "natural_sample_1_with_90_vpm_sliding": "natural populations"
}

In [None]:
color_by_model = {name_by_predictor[predictor]: color for predictor, color in color_by_predictor.items()}

In [None]:
predictors_by_sample = {
    "simulated_sample_3": [
        "naive",
        "normalized_fitness",
        "ep_x",
        "ne_star",
        "lbi",
        "delta_frequency",
        "ep_star-ne_star",
        "ep_x-ne_star",
        "lbi-ne_star"
    ],
    "natural_sample_1_with_90_vpm_sliding": [
        "naive",
        "ep_x",
        "cTiter_x",
        "ne_star",
        "dms_star",
        "lbi",
        "delta_frequency",
        "ep_star-ne_star",
        "ep_x-ne_star",
        "cTiter_x-ne_star",
        "ne_star-lbi",
        "cTiter_x-ne_star-lbi"
    ]
}

In [None]:
df = pd.read_table(model_distances)

## Bootstrap hypothesis tests

Perform [bootstrap hypothesis tests](https://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Bootstrap_hypothesis_testing) (Efron and Tibshirani 1993) between biologically-informed models and the naive model for each dataset.
The following logic is copied from the article linked above to support the logic of the functions defined below.

Calculate test statistic _t_:

$$
t = \frac{\bar{x}-\bar{y}}{\sqrt{\sigma_x^2/n + \sigma_y^2/m}}
$$

Create two new data sets whose values are $x_i^{'} = x_i - \bar{x} + \bar{z}$ and $y_i^{'} = y_i - \bar{y} + \bar{z}$, where $\bar{z}$ is the mean of the combined sample.

Draw a random sample ($x_i^*$) of size $n$ with replacement from $x_i^{'}$ and another random sample ($y_i^*$) of size $m$ with replacement from $y_i^{'}$.

Calculate the test statistic $t^* = \frac{\bar{x^*}-\bar{y^*}}{\sqrt{\sigma_x^{*2}/n + \sigma_y^{*2}/m}}$

Repeat 3 and 4 $B$ times (e.g. $B=1000$) to collect $B$ values of the test statistic.

Estimate the p-value as $p = \frac{\sum_{i=1}^B I\{t_i^* \geq t\}}{B}$ where $I(\text{condition}) = 1$ when ''condition'' is true and 0 otherwise.

In [None]:
def get_model_distances_by_build(df, sample, error_type, predictors):
    return df.query(
        f"(sample == '{sample}') & (error_type == '{error_type}') & (predictors == '{predictors}')"
    )["validation_error"].values

In [None]:
def calculate_t_statistic(x_dist, y_dist):
    """Calculate the t statistic between two given distributions.
    """
    # Calculate mean and variance for the two input distributions.
    x_mean = x_dist.mean()
    x_var = np.var(x_dist)
    x_length = x_dist.shape[0]

    y_mean = y_dist.mean()
    y_var = np.var(y_dist)
    y_length = y_dist.shape[0]

    # Calculate the test statistic t.
    t = (x_mean - y_mean) / np.sqrt((x_var / x_length) + (y_var / y_length))
    
    return t

In [None]:
def bootstrap_t(x_dist_adjusted, y_dist_adjusted):
    """For a given pair of distributions that have been recentered on the mean of the union of their original distributions,
    create a single bootstrap sample from each distribution and calculate the corresponding t statistic for that sample.
    """
    x_dist_adjusted_sample = np.random.choice(x_dist_adjusted, size=x_dist_adjusted.shape[0], replace=True)
    y_dist_adjusted_sample = np.random.choice(y_dist_adjusted, size=y_dist_adjusted.shape[0], replace=True)
    
    return calculate_t_statistic(x_dist_adjusted_sample, y_dist_adjusted_sample)

In [None]:
def compare_distributions_by_bootstrap(x_dist, y_dist, n_bootstraps):
    """Compare the means of two given distributions by a bootstrap hypothesis test.
    
    Returns the p-value, t statistic, and the bootstrap distribution of t values.
    """
    # Calculate means of input distributions.
    x_mean = x_dist.mean()
    y_mean = y_dist.mean()
        
    # Calculate the test statistic t.
    t = calculate_t_statistic(x_dist, y_dist)
    
    # Calculate mean of joint distribution.
    z_dist = np.concatenate([x_dist, y_dist])
    z_mean = z_dist.mean()
    
    # Create new distributions centered on the mean of the joint distribution.
    x_dist_adjusted = x_dist - x_mean + z_mean
    y_dist_adjusted = y_dist - y_mean + z_mean
    
    bootstrapped_t_dist = np.array([
        bootstrap_t(x_dist_adjusted, y_dist_adjusted)
        for i in range(n_bootstraps)
    ])
    
    p_value = (bootstrapped_t_dist >= t).sum() / n_bootstraps
    
    return (p_value, t, bootstrapped_t_dist)

In [None]:
example_model_dist = get_model_distances_by_build(
    df,
    "simulated_sample_3",
    "validation",
    "normalized_fitness"
)

example_naive_dist = get_model_distances_by_build(
    df,
    "simulated_sample_3",
    "validation",
    "naive"
)

example_model_difference = example_model_dist - example_naive_dist
example_null_difference = example_model_difference - example_model_difference.mean()

In [None]:
example_model_dist

In [None]:
example_naive_dist

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 4))

bins = np.arange(
    min(example_model_difference.min(), example_null_difference.min()),
    max(example_model_difference.max(), example_null_difference.max()),
    0.5
)

ax.hist(example_model_difference, bins=bins, label="true fitness", alpha=0.5)
ax.hist(example_null_difference, bins=bins, label="null model", alpha=0.5)

ax.axvline(x=example_model_difference.mean(), label="model mean", color="blue")
ax.axvline(x=example_null_difference.mean(), label="null model mean", color="orange")

ax.set_xlim(-6, 6)

ax.set_xlabel("Model - naive distance to future (AAs)")
ax.set_ylabel("Number of timepoints")
ax.set_title(
    "Example model and null distributions\nfor differences between distances to the future",
    fontsize=12
)

ax.legend(frameon=False)

In [None]:
# Compare all model distributions to the corresponding naive model distribution for
# all samples and error types. Store the resulting p-values and metadata in a new
# data frame.
p_values = []
bootstrapped_t_distributions = []

for sample, predictors in predictors_by_sample.items():
    sample_df = df.query(f"sample == '{sample}'")
    for error_type in error_types:
        error_type_df = sample_df.query(f"error_type == '{error_type}'")
        naive_dist = error_type_df.query("predictors == 'naive'")["validation_error"].values

        for predictor in predictors:
            if predictor == "naive":
                continue

            predictor_dist = error_type_df.query(f"predictors == '{predictor}'")["validation_error"].values
            
            # Calculate the difference between the model's distance to the future
            # and the naive model's at the same timepoint. This difference should
            # account for timepoint-to-timepoint variation observed across all models.
            difference_dist = predictor_dist - naive_dist
            
            # Center the observed distribution by its mean to produce a null distribution
            # with the same variance and a mean of zero. We want to test whether the
            # observed differences between this model and the naive model are different
            # from zero.
            null_difference_dist = difference_dist - difference_dist.mean()
            
            # Perform the bootstrap hypothesis test between the differences distributions.
            p_value, t, bootstrapped_t_dist = compare_distributions_by_bootstrap(
                null_difference_dist,
                difference_dist,
                n_bootstraps
            )
            p_values.append({
                "sample": sample,
                "error_type": error_type,
                "predictors": predictor,
                "t": t,
                "p_value": p_value
            })
            
            bootstrapped_t_distributions.append(
                pd.DataFrame({
                    "sample": sample,
                    "error_type": error_type,
                    "predictors": predictor,
                    "empirical_t": t,
                    "p_value": p_value,
                    "bootstrap_t": bootstrapped_t_dist
                })
            )

In [None]:
bootstrapped_t_distributions_df = pd.concat(bootstrapped_t_distributions)

In [None]:
bootstrapped_t_distributions_df.head()

In [None]:
bootstrapped_t_distributions_df.shape

In [None]:
empirical_t_values.shape

In [None]:
p_values.shape

In [None]:
example_df.groupby("predictors")["p_value"].first().values

In [None]:
empirical_t_values

In [None]:
name_by_sample

In [None]:
sample = "natural_sample_1_with_90_vpm_sliding"
#sample = "simulated_sample_3"
error_type = "validation"
error_type = "test"
example_df = bootstrapped_t_distributions_df.query(f"(sample == '{sample}') & (error_type == '{error_type}')")
example_df = example_df.sort_values("empirical_t", ascending=False).copy()
grouped_df = example_df.groupby("predictors", sort=False)

predictors = grouped_df["predictors"].first().values
empirical_t_values = grouped_df["empirical_t"].first().values
p_values = grouped_df["p_value"].first().values

n_rows = int(np.ceil(p_values.shape[0] / 2.0))
n_cells = 2 * n_rows

fig, all_axes = plt.subplots(
    n_rows,
    2,
    figsize=(8, n_rows),
    sharex=True,
    sharey=True
)
axes = all_axes.flatten()
bins = np.arange(-5, 5, 0.25)

for i, predictor in enumerate(predictors):
    ax = axes[i]
    
    if p_values[i] < 1.0 / n_bootstraps:
        p_value = f"p < {1.0 / n_bootstraps}"
    else:
        p_value = f"p = {p_values[i]}"
    
    ax.hist(
        example_df.query(f"predictors == '{predictor}'")["bootstrap_t"].values,
        bins=bins,
        color=histogram_color_by_predictor[predictor]
    )
    ax.axvline(
        empirical_t_values[i],
        color="orange"
    )
    ax.text(
        0.01,
        0.9,
        f"$t$ = {empirical_t_values[i]:.2f}, {p_value}",
        horizontalalignment="left",
        verticalalignment="center",
        transform=ax.transAxes,
        fontsize=10
    )
    
    ax.set_title(
        name_by_predictor[predictor].replace("\n", " "),
        fontsize=10
    )
    
    if i >= n_cells - 2:
        ax.set_xlabel("$t$ statistic")
    
fig.text(
    0.0,
    0.5,
    "bootstrap samples",
    rotation="vertical",
    horizontalalignment="center",
    verticalalignment="center"
)

fig.text(
    0.5,
    0.99,
    f"{name_by_sample[sample]}, {error_type} period",
    horizontalalignment="center",
    verticalalignment="center",
    fontsize=12
)

fig.tight_layout(pad=0.75, w_pad=0.5, h_pad=0.5)

In [None]:
p_value_df = pd.DataFrame(p_values)

In [None]:
p_value_df

Identify models whose mean distances are significantly closer to future populations than the naive model ($\alpha=0.05$).

In [None]:
p_value_df[p_value_df["p_value"] < 0.05]

In [None]:
p_value_df.to_csv(output_table, sep="\t", index=False)

## Compare distributions of composite and individual models

Perform bootstrap hypothesis tests between composite models and their respective individual models to determine whether any composite models are significantly more accurate. We only perform these for natural populations.

In [None]:
composite_models = {
    "simulated_sample_3": [
        {
            "individual": ["ne_star", "lbi"],
            "composite": "lbi-ne_star"
        },
        {
            "individual": ["ep_x", "ne_star"],
            "composite": "ep_x-ne_star"
        },
        {
            "individual": ["ep_star", "ne_star"],
            "composite": "ep_star-ne_star"
        }
    ],
    "natural_sample_1_with_90_vpm_sliding": [
        {
            "individual": ["cTiter_x", "ne_star"],
            "composite": "cTiter_x-ne_star"
        },
        {
            "individual": ["ne_star", "lbi"],
            "composite": "ne_star-lbi"
        },
        {
            "individual": ["ep_x", "ne_star"],
            "composite": "ep_x-ne_star"
        },
        {
            "individual": ["ep_star", "ne_star"],
            "composite": "ep_star-ne_star"
        }
    ]
}

In [None]:
composite_vs_individual_p_values = []

for error_type in error_types:
    for sample, models in composite_models.items():
        for model in models:
            composite_dist = get_model_distances_by_build(df, sample, error_type, model["composite"])

            for individual_model in model["individual"]:
                individual_dist = get_model_distances_by_build(df, sample, error_type, individual_model)

                # Calculate the difference between the composite model's distance to the future
                # and the individual model's at the same timepoint. This difference should
                # account for timepoint-to-timepoint variation observed across all models.
                difference_dist = composite_dist - individual_dist

                # Center the observed distribution by its mean to produce a null distribution
                # with the same variance and a mean of zero. We want to test whether the
                # observed differences between the composite and individual models are different
                # from zero.
                null_difference_dist = difference_dist - difference_dist.mean()
                
                p_value, t, bootstrapped_t_dist = compare_distributions_by_bootstrap(
                    null_difference_dist,
                    difference_dist,
                    n_bootstraps
                )

                composite_vs_individual_p_values.append({
                    "sample": sample,
                    "error_type": error_type,
                    "individual_model": individual_model,
                    "composite_model": model["composite"],
                    "t": t,
                    "p_value": p_value
                })

In [None]:
composite_vs_individual_p_values_df = pd.DataFrame(composite_vs_individual_p_values)

In [None]:
composite_vs_individual_p_values_df.query("p_value < 0.05")

## Calculate bootstraps for all models and samples

In [None]:
df["error_difference"] = df["validation_error"] - df["null_validation_error"]

In [None]:
bootstrap_distances = []
for (sample, error_type, predictors), group_df in df.groupby(["sample", "error_type", "predictors"]):
    if sample not in predictors_by_sample:
        continue
        
    if predictors not in predictors_by_sample[sample]:
        continue
        
    print(f"Processing: {sample}, {error_type}, {predictors}")
    
    # Calculate difference between validation error
    
    bootstrap_distribution = [
        group_df["error_difference"].sample(frac=1.0, replace=True).mean()
        for i in range(n_bootstraps)
    ]
    
    bootstrap_distances.append(pd.DataFrame({
        "sample": sample,
        "error_type": error_type,
        "predictors": predictors,
        "bootstrap_distance": bootstrap_distribution
    }))

In [None]:
bootstraps_df = pd.concat(bootstrap_distances)

In [None]:
bootstraps_df["model"] = bootstraps_df["predictors"].map(name_by_predictor)

In [None]:
bootstraps_df.head()

In [None]:
def plot_bootstrap_distances(bootstraps_df, predictors, title, width=16, height=8):
    fig, axes = plt.subplots(2, 1, figsize=(width, height), gridspec_kw={"hspace": 0.5})

    sample_name = bootstraps_df["sample"].drop_duplicates().values[0]
    bootstrap_df = bootstraps_df.query("error_type == 'validation'")
    bootstrap_df = bootstrap_df[bootstrap_df["predictors"].isin(predictors)].copy()

    # Use this order for both validation and test facets as in Tables 1 and 2.
    models_order = bootstrap_df.groupby("model")["bootstrap_distance"].mean().sort_values().reset_index()["model"].values
    predictors_order = bootstrap_df.groupby("predictors")["bootstrap_distance"].mean().sort_values().reset_index()["predictors"].values

    median_naive_distance = bootstrap_df.query("predictors == 'naive'")["bootstrap_distance"].median()
        
    validation_ax = axes[0]
    validation_ax = sns.violinplot(
        x="model",
        y="bootstrap_distance",
        data=bootstrap_df,
        order=models_order,
        ax=validation_ax,
        palette=color_by_model,
        cut=0
    )
    
    max_distance = bootstrap_df["bootstrap_distance"].max() + 0.3
    validation_ax.set_ylim(top=max_distance + 0.6)
    
    for index, predictor in enumerate(predictors_order):
        if predictor == "naive":
            continue
            
        p_value = p_value_df.query(f"(sample == '{sample_name}') & (error_type == 'validation') & (predictors == '{predictor}')")["p_value"].values[0]
        if p_value < (1.0 / n_bootstraps):
            p_value_string = f"p < {1.0 / n_bootstraps}"
        else:
            p_value_string = f"p = {p_value:.4f}"
            
        validation_ax.text(
            index,
            max_distance,
            p_value_string,
            fontsize=12,
            horizontalalignment="center",
            verticalalignment="center"
        )

    validation_ax.axhline(y=median_naive_distance, label="naive", color="#999999", zorder=-10)
    validation_ax.title.set_text(f"Validation of {name_by_sample[sample]}")

    validation_ax.set_xlabel("Model")
    validation_ax.set_ylabel("Bootstrapped model - naive\ndistance to future (AAs)")

    bootstrap_df = bootstraps_df.query("error_type == 'test'")
    bootstrap_df = bootstrap_df[bootstrap_df["predictors"].isin(predictors)].copy()

    median_naive_distance = bootstrap_df.query("predictors == 'naive'")["bootstrap_distance"].median()

    test_ax = axes[1]
    test_ax = sns.violinplot(
        x="model",
        y="bootstrap_distance",
        data=bootstrap_df,
        order=models_order,
        ax=test_ax,
        palette=color_by_model,
        cut=0
    )

    max_distance = bootstrap_df["bootstrap_distance"].max() + 0.3
    test_ax.set_ylim(top=max_distance + 0.6)
    
    for index, predictor in enumerate(predictors_order):
        if predictor == "naive":
            continue
            
        p_value = p_value_df.query(f"(sample == '{sample_name}') & (error_type == 'test') & (predictors == '{predictor}')")["p_value"].values[0]
        if p_value < (1.0 / n_bootstraps):
            p_value_string = f"p < {1.0 / n_bootstraps}"
        else:
            p_value_string = f"p = {p_value:.4f}"
            
        test_ax.text(
            index,
            max_distance,
            p_value_string,
            fontsize=12,
            horizontalalignment="center",
            verticalalignment="center"
        )

    test_ax.set_xlabel("Model")
    test_ax.set_ylabel("Bootstrapped model - naive\ndistance to future (AAs)")

    test_ax.axhline(y=median_naive_distance, label="naive", color="#999999", zorder=-10)
    test_ax.title.set_text(f"Test of {name_by_sample[sample]}")

    sns.despine()
    
    return fig, axes

In [None]:
sample = "simulated_sample_3"
fig, axes = plot_bootstrap_distances(
    bootstraps_df.query(f"sample == '{sample}'"),
    predictors_by_sample[sample],
    name_by_sample[sample],
    width=16
)

plt.tight_layout()
plt.savefig(bootstrap_figure_for_simulated_sample, bbox_inches="tight")

In [None]:
sample = "natural_sample_1_with_90_vpm_sliding"
fig, axes = plot_bootstrap_distances(
    bootstraps_df.query(f"sample == '{sample}'"),
    predictors_by_sample[sample],
    name_by_sample[sample],
    width=24
)

plt.tight_layout()
plt.savefig(bootstrap_figure_for_natural_sample, bbox_inches="tight")