# Bootstrap distances to the future

Estimate uncertainty of distance to the future values per sample and model using the bootstrap of differences between observed distances across time for biologically-informed and naive models.

## Define inputs, outputs, and parameters

In [None]:
# Define inputs.
model_distances = snakemake.input.model_distances

# Define outputs.
output_table = snakemake.output.output_table
bootstrap_figure_for_simulated_sample = snakemake.output.bootstrap_figure_for_simulated_sample
bootstrap_figure_for_natural_sample = snakemake.output.bootstrap_figure_for_natural_sample
composite_vs_individual_model_table = snakemake.output.composite_vs_individual_model_table

# Define parameters.
n_bootstraps = snakemake.params.n_bootstraps

error_types = ["validation", "test"]

## Import dependencies

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline

## Configure plots and analyses

In [None]:
sns.set_style("white")

In [None]:
# Display figures at a reasonable default size.
mpl.rcParams['figure.figsize'] = (6, 4)

# Disable top and right spines.
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
    
# Display and save figures at higher resolution for presentations and manuscripts.
mpl.rcParams['savefig.dpi'] = 200
mpl.rcParams['figure.dpi'] = 120

# Display text at sizes large enough for presentations and manuscripts.
mpl.rcParams['font.weight'] = "normal"
mpl.rcParams['axes.labelweight'] = "normal"
mpl.rcParams['font.size'] = 14
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['legend.fontsize'] = 12
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12

mpl.rc('text', usetex=False)

In [None]:
color_by_predictor = {
    'naive': '#cccccc',
    'offspring': '#000000',
    'normalized_fitness': '#999999',
    'fitness': '#000000',
    'ep': '#4575b4',
    'ep_wolf': '#4575b4',
    'ep_star': '#4575b4',
    'ep_x': '#4575b4',
    'ep_x_koel': '#4575b4',
    'ep_x_wolf': '#4575b4',
    'oracle_x': '#4575b4',
    'rb': '#4575b4',
    'cTiter': '#91bfdb',
    'cTiter_x': '#91bfdb',
    'cTiterSub': '#91bfdb',
    'cTiterSub_star': '#91bfdb',
    'cTiterSub_x': '#91bfdb',
    'fra_cTiter_x': '#91bfdb',
    'ne_star': '#2ca25f',
    'dms_star': '#99d8c9',
    "dms_nonepitope": "#99d8c9",
    "dms_entropy": "#99d8c9",
    'unnormalized_lbi': '#fc8d59',
    'lbi': '#fc8d59',
    'delta_frequency': '#d73027',
    'ep_x-ne_star': "#ffffff",
    'ep_star-ne_star': "#ffffff",
    'lbi-ne_star': "#ffffff",
    'ne_star-lbi': "#ffffff",
    'cTiter_x-ne_star': "#ffffff",
    'cTiter_x-ne_star-lbi': "#ffffff",
    'fra_cTiter_x-ne_star': "#ffffff"
}

name_by_predictor = {
    "naive": "naive",
    "offspring": "observed fitness",
    "normalized_fitness": "true fitness",
    "fitness": "estimated fitness",
    "ep": "epitope mutations",
    "ep_wolf": "Wolf epitope mutations",
    "ep_star": "epitope ancestor",
    "ep_x": "epitope antigenic\nnovelty",
    "ep_x_koel": "Koel epitope antigenic novelty",
    "ep_x_wolf": "Wolf epitope antigenic novelty",
    "oracle_x": "oracle antigenic novelty",
    "rb": "Koel epitope mutations",
    "cTiter": "antigenic advance",
    "cTiter_x": "HI antigenic novelty",
    "cTiterSub": "linear HI mut phenotypes",
    "cTiterSub_star": "ancestral HI mut phenotypes",
    "cTiterSub_x": "HI sub cross-immunity",
    "fra_cTiter_x": "FRA antigenic novelty",
    "ne_star": "mutational load",
    "dms_star": "DMS mutational\neffects",
    "dms_nonepitope": "DMS mutational load",
    "dms_entropy": "DMS entropy",
    "unnormalized_lbi": "unnormalized LBI",
    "lbi": "LBI",
    "delta_frequency": "delta frequency",
    'ep_x-ne_star': "mutational load +\nepitope antigenic\nnovelty",
    'ep_star-ne_star': "mutational load +\nepitope ancestor",
    'lbi-ne_star': "mutational load +\n LBI",
    'ne_star-lbi': "mutational load +\n LBI",
    'cTiter_x-ne_star': "mutational load +\nHI antigenic novelty",
    'cTiter_x-ne_star-lbi': "mutational load +\nHI antigenic novelty +\nLBI",
    'fra_cTiter_x-ne_star': "mutational load +\nFRA antigenic novelty"
}

name_by_sample = {
    "simulated_sample_3": "simulated populations",
    "natural_sample_1_with_90_vpm_sliding": "natural populations"
}

In [None]:
color_by_model = {name_by_predictor[predictor]: color for predictor, color in color_by_predictor.items()}

In [None]:
predictors_by_sample = {
    "simulated_sample_3": [
        "normalized_fitness",
        "ep_star",
        "ep_x",
        "ne_star",
        "lbi",
        "delta_frequency",
        "ep_star-ne_star",
        "ep_x-ne_star",
        "lbi-ne_star"
    ],
    "natural_sample_1_with_90_vpm_sliding": [
        "ep_x",
        "cTiter_x",
        "ne_star",
        "dms_star",
        "lbi",
        "delta_frequency",
        "ep_star-ne_star",
        "ep_x-ne_star",
        "cTiter_x-ne_star",
        "ne_star-lbi",
        "cTiter_x-ne_star-lbi"
    ]
}

In [None]:
df = pd.read_table(model_distances)

In [None]:
def get_model_distances_by_build(df, sample, error_type, predictors):
    return df.query(
        f"(sample == '{sample}') & (error_type == '{error_type}') & (predictors == '{predictors}')"
    )["validation_error"].values

## Calculate bootstraps for all models and samples

Build bootstrap distributions for empirical differences between biologically-informed and naive models at each timepoint. Values that are less than zero occur when a given model estimates a population closer to the future than the naive model.

In [None]:
df["error_difference"] = df["validation_error"] - df["null_validation_error"]

In [None]:
df.groupby(["sample", "error_type", "predictors"])["error_difference"].aggregate(["mean", "std"]).reset_index()

In [None]:
bootstrap_distances = []
for (sample, error_type, predictors), group_df in df.groupby(["sample", "error_type", "predictors"]):
    if sample not in predictors_by_sample:
        continue
        
    if predictors not in predictors_by_sample[sample]:
        continue
        
    print(f"Processing: {sample}, {error_type}, {predictors}")
    
    # Calculate difference between validation error
    
    bootstrap_distribution = [
        group_df["error_difference"].sample(frac=1.0, replace=True).mean()
        for i in range(n_bootstraps)
    ]
    
    bootstrap_distances.append(pd.DataFrame({
        "sample": sample,
        "error_type": error_type,
        "predictors": predictors,
        "bootstrap_distance": bootstrap_distribution
    }))

In [None]:
bootstraps_df = pd.concat(bootstrap_distances)

In [None]:
bootstraps_df["model"] = bootstraps_df["predictors"].map(name_by_predictor)

In [None]:
bootstraps_df.head()

In [None]:
bootstraps_df.groupby(["sample", "error_type", "predictors"])["bootstrap_distance"].aggregate([
    "mean",
    "std"
])

## Calculate p values from bootstraps

Estimate signifance of difference between each model's distance to the future and the corresponding naive model by calculating the proportion of bootstraps with values less than zero. The null hypothesis here is that there is no difference between biologically-informed and naive models at each timepoint.

In [None]:
grouped_bootstraps_df = bootstraps_df.groupby(["sample", "error_type", "predictors"])

In [None]:
p_value_df = grouped_bootstraps_df.apply(
    lambda grouped_df: sum(grouped_df["bootstrap_distance"] >= 0) / float(n_bootstraps)
).reset_index().rename(columns={0: "p_value"})

In [None]:
p_value_df[p_value_df["p_value"] < 0.05]

In [None]:
p_value_df.to_csv(output_table, sep="\t", index=False)

## Plot bootstrap distributions used to calculate p values

In [None]:
def plot_bootstrap_distances(bootstraps_df, predictors, title, width=16, height=8):
    fig, axes = plt.subplots(2, 1, figsize=(width, height), sharey=True)

    sample_name = bootstraps_df["sample"].drop_duplicates().values[0]
    bootstrap_df = bootstraps_df.query("error_type == 'validation'")
    bootstrap_df = bootstrap_df[bootstrap_df["predictors"].isin(predictors)].copy()

    # Use this order for both validation and test facets as in Tables 1 and 2.
    models_order = bootstrap_df.groupby("model")["bootstrap_distance"].mean().sort_values().reset_index()["model"].values
    predictors_order = bootstrap_df.groupby("predictors")["bootstrap_distance"].mean().sort_values().reset_index()["predictors"].values
       
    validation_ax = axes[0]
    validation_ax = sns.violinplot(
        x="model",
        y="bootstrap_distance",
        data=bootstrap_df,
        order=models_order,
        ax=validation_ax,
        palette=color_by_model,
        cut=0
    )
    
    max_distance = bootstrap_df["bootstrap_distance"].max() + 0.1
    validation_ax.set_ylim(top=max_distance + 0.75)
    
    for index, predictor in enumerate(predictors_order):
        if predictor == "naive":
            continue
            
        p_value = p_value_df.query(f"(sample == '{sample_name}') & (error_type == 'validation') & (predictors == '{predictor}')")["p_value"].values[0]
        if p_value < (1.0 / n_bootstraps):
            p_value_string = f"p < {1.0 / n_bootstraps}"
        else:
            p_value_string = f"p = {p_value:.4f}"

        differences = bootstrap_df.query(f"(predictors == '{predictor}')")["bootstrap_distance"]
        mean_difference = differences.mean()
        std_difference = differences.std()
        
        effect_description = f"{mean_difference:.2f} +/- {std_difference:.2f} AAs\n{p_value_string}"
            
        validation_ax.text(
            index,
            max_distance,
            effect_description,
            fontsize=12,
            horizontalalignment="center",
            verticalalignment="bottom"
        )

    validation_ax.axhline(y=0.0, label="naive", color="#999999", zorder=-10)
    validation_ax.title.set_text(f"Validation of {name_by_sample[sample]}")

    validation_ax.set_xlabel("Model")
    validation_ax.set_ylabel("Bootstrapped model - naive\ndistance to future (AAs)")

    bootstrap_df = bootstraps_df.query("error_type == 'test'")
    bootstrap_df = bootstrap_df[bootstrap_df["predictors"].isin(predictors)].copy()

    test_ax = axes[1]
    test_ax = sns.violinplot(
        x="model",
        y="bootstrap_distance",
        data=bootstrap_df,
        order=models_order,
        ax=test_ax,
        palette=color_by_model,
        cut=0
    )

    max_distance = bootstrap_df["bootstrap_distance"].max() + 0.1
    test_ax.set_ylim(top=max_distance + 0.75)
    
    for index, predictor in enumerate(predictors_order):
        if predictor == "naive":
            continue
            
        p_value = p_value_df.query(f"(sample == '{sample_name}') & (error_type == 'test') & (predictors == '{predictor}')")["p_value"].values[0]
        if p_value < (1.0 / n_bootstraps):
            p_value_string = f"p < {1.0 / n_bootstraps}"
        else:
            p_value_string = f"p = {p_value:.4f}"

        differences = bootstrap_df.query(f"(predictors == '{predictor}')")["bootstrap_distance"]
        mean_difference = differences.mean()
        std_difference = differences.std()
        
        effect_description = f"{mean_difference:.2f} +/- {std_difference:.2f} AAs\n{p_value_string}"

        test_ax.text(
            index,
            max_distance,
            effect_description,
            fontsize=12,
            horizontalalignment="center",
            verticalalignment="bottom"
        )

    test_ax.set_xlabel("Model")
    test_ax.set_ylabel("Bootstrapped model - naive\ndistance to future (AAs)")

    test_ax.axhline(y=0.0, label="no difference from naive", color="#999999", zorder=-10)
    test_ax.title.set_text(f"Test of {name_by_sample[sample]}")

    sns.despine()
    
    fig.tight_layout(pad=0.75, w_pad=1.0, h_pad=1.0)
    
    return fig, axes

In [None]:
sample = "simulated_sample_3"
fig, axes = plot_bootstrap_distances(
    bootstraps_df.query(f"sample == '{sample}'"),
    predictors_by_sample[sample],
    name_by_sample[sample],
    width=16
)

plt.savefig(bootstrap_figure_for_simulated_sample, bbox_inches="tight")

In [None]:
sample = "natural_sample_1_with_90_vpm_sliding"
fig, axes = plot_bootstrap_distances(
    bootstraps_df.query(f"sample == '{sample}'"),
    predictors_by_sample[sample],
    name_by_sample[sample],
    width=20,
    height=10
)

plt.savefig(bootstrap_figure_for_natural_sample, bbox_inches="tight")

## Compare distributions of composite and individual models

Perform bootstrap tests between composite models and their respective individual models to determine whether any composite models are significantly more accurate.

In [None]:
composite_models = {
    "simulated_sample_3": [
        {
            "individual": ["ne_star", "lbi", "normalized_fitness"],
            "composite": "lbi-ne_star"
        }
    ],
    "natural_sample_1_with_90_vpm_sliding": [
        {
            "individual": ["cTiter_x", "ne_star"],
            "composite": "cTiter_x-ne_star"
        },
        {
            "individual": ["ne_star", "lbi"],
            "composite": "ne_star-lbi"
        }
    ]
}

In [None]:
composite_vs_individual_p_values = []

for error_type in error_types:
    for sample, models in composite_models.items():
        for model in models:
            composite_dist = get_model_distances_by_build(df, sample, error_type, model["composite"])

            for individual_model in model["individual"]:
                individual_dist = get_model_distances_by_build(df, sample, error_type, individual_model)

                # Calculate the difference between the composite model's distance to the future
                # and the individual model's at the same timepoint. This difference should
                # account for timepoint-to-timepoint variation observed across all models.
                difference_dist = pd.Series(composite_dist - individual_dist)
                
                bootstrap_distribution = np.array([
                    difference_dist.sample(frac=1.0, replace=True).mean()
                    for i in range(n_bootstraps)
                ])
                p_value = (bootstrap_distribution >= 0).sum() / float(n_bootstraps)

                composite_vs_individual_p_values.append({
                    "sample": sample,
                    "error_type": error_type,
                    "individual_model": individual_model,
                    "composite_model": model["composite"],
                    "bootstrap_mean": bootstrap_distribution.mean(),
                    "bootstrap_std": bootstrap_distribution.std(),
                    "p_value": p_value
                })

In [None]:
composite_vs_individual_p_values_df = pd.DataFrame(composite_vs_individual_p_values).sort_values(
    ["sample", "error_type", "composite_model", "individual_model"],
    ascending=False
)

In [None]:
composite_vs_individual_p_values_df["individual_model"] = composite_vs_individual_p_values_df["individual_model"].map(
    name_by_predictor
)

In [None]:
composite_vs_individual_p_values_df["composite_model"] = composite_vs_individual_p_values_df["composite_model"].map(
    name_by_predictor
).apply(lambda name: name.replace("\n", " "))

In [None]:
composite_vs_individual_p_values_df["sample"] = composite_vs_individual_p_values_df["sample"].map(
    name_by_sample
).apply(lambda name: name.replace(" populations", ""))

In [None]:
composite_vs_individual_p_values_df["bootstrap_mean"] = np.around(
    composite_vs_individual_p_values_df["bootstrap_mean"],
    2
)

In [None]:
composite_vs_individual_p_values_df["bootstrap_std"] = np.around(
    composite_vs_individual_p_values_df["bootstrap_std"],
    2
)

In [None]:
composite_vs_individual_p_values_df["p_value"] = composite_vs_individual_p_values_df["p_value"].apply(
    lambda p_value: f"$<${1.0 / n_bootstraps}" if p_value == 0.0 else str(p_value)
)

In [None]:
composite_vs_individual_p_values_df

In [None]:
print(composite_vs_individual_p_values_df.to_latex().replace("\$", "$"))

In [None]:
with open(composite_vs_individual_model_table, "w") as oh:
    oh.write(composite_vs_individual_p_values_df.to_latex(index=False).replace("\$", "$"))