In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline

In [None]:
sns.set_style("ticks")

## Compare distances

Compare the model distances between the estimated and observed future populations when the estimated populations depend on different data delay conditions. We expect that the "no delay" scenario will be closer to the true future on average followed by the "ideal" and then the "realistic" conditions.

In [None]:
distances = pd.read_csv(
    snakemake.input.distances,
    sep="\t",
    parse_dates=[
        "initial_timepoint",
        "future_timepoint",
    ]
)

In [None]:
distances

In [None]:
delay_types = set(distances["delay_type"].drop_duplicates().values)

In [None]:
delay_types

In [None]:
realistic_delay_type = list(delay_types - {"none", "ideal"})[0]

In [None]:
realistic_delay_type

In [None]:
summary_distances_by_delay_and_horizon = distances.groupby(["horizon", "delay_type"], sort=False).agg({
    "distance": ["mean", "median", "std"],
}).round(2)

In [None]:
summary_distances_by_delay_and_horizon[("distance", "mean")]

In [None]:
summary_distances_by_delay_and_horizon.columns

In [None]:
summary_distances_by_delay_and_horizon.columns = [
    column[1]
    for column in summary_distances_by_delay_and_horizon.columns
]

In [None]:
summary_distances_by_delay_and_horizon

In [None]:
summary_distances_by_delay_and_horizon["mean_std"] = summary_distances_by_delay_and_horizon.apply(
    lambda row: f"{row['mean']:.2f} +/- {row['std']:.2f}",
    axis=1,
)

In [None]:
summary_distances_by_delay_and_horizon

In [None]:
summary_distances_by_delay_and_horizon = summary_distances_by_delay_and_horizon.pivot_table(
    values=["mean_std"],
    index=["horizon"],
    columns=["delay_type"],
    aggfunc=lambda value: value,
    sort=False,
)

In [None]:
summary_distances_by_delay_and_horizon

In [None]:
summary_distances_by_delay_and_horizon.columns = [
    column[1]
    for column in summary_distances_by_delay_and_horizon.columns
]

In [None]:
summary_distances_by_delay_and_horizon = summary_distances_by_delay_and_horizon.reset_index()

In [None]:
summary_distances_by_delay_and_horizon

In [None]:
if realistic_delay_type == "observed":
    table_template_header = r"""
\begin{tabular*}{0.7\textwidth}{rrrr}
\toprule
          & \multicolumn{3}{c}{Distance to future (mean +/- std dev AAs)} \\
  Horizon & No delay & Ideal delay & Observed delay \\
\midrule
"""
    table_template_row = r"{horizon} & {none} & {ideal} & {observed} \\"
else:
    table_template_header = r"""
\begin{tabular*}{0.7\textwidth}{rrrr}
\toprule
          & \multicolumn{3}{c}{Distance to future (mean +/- std dev AAs)} \\
  Horizon & No delay & Ideal delay & Realistic delay \\
\midrule
"""
    table_template_row = r"{horizon} & {none} & {ideal} & {realistic} \\"

table_template_footer = r"""
\bottomrule
\end{tabular*}
"""

In [None]:
with open(snakemake.output.distances_summary_table, "w", encoding="utf-8") as oh:
    oh.write(table_template_header + "\n")
    
    for record in summary_distances_by_delay_and_horizon.to_dict(orient="records"):
        oh.write(table_template_row.format(**record) + "\n")
    
    oh.write(table_template_footer + "\n")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 4), dpi=200)
sns.boxplot(
    x="horizon",
    y="distance",
    hue="delay_type",
    hue_order=("none", "ideal", realistic_delay_type),
    data=distances,
    color="#CCCCCC",
    ax=ax,
    fliersize=0,
)
sns.stripplot(
    x="horizon",
    y="distance",
    hue="delay_type",
    data=distances,
    hue_order=("none", "ideal", realistic_delay_type),
    alpha=0.35,
    ax=ax,
    dodge=True,
)

ax.set_ylim(bottom=0)

handles, labels = ax.get_legend_handles_labels()

ax.legend(
    handles=handles[3:],
    labels=labels[3:],
    loc="upper left",
    title="Type of delay",
    frameon=False,
)
ax.set_xlabel("Forecast horizon (months)")
ax.set_ylabel("Distance to the future (AAs)")

sns.despine()

plt.tight_layout()

plt.savefig(snakemake.output.distances_figure)