In [None]:
import os

from keyname import keyname as kn
import matplotlib.pyplot as plt
from nbmetalog import nbmetalog as nbm
import numpy as np
import os
import pandas as pd
import seaborn as sns
from teeplot import teeplot


In [None]:
# prints metadata about notebook runtime
nbm.print_metadata()


In [None]:
df = pd.read_csv("https://osf.io/ck47r/download")
dfdigest = np.bitwise_xor.reduce(
    pd.util.hash_pandas_object(df),
)
print("{:x}".format(dfdigest))
df


In [None]:
evolutionary_variables = [
    "mut_distn",  # sensitivity analysis
    "num_generations",  # sensitivity analysis
    "num_islands",
    "num_niches",
    "p_island_migration",  # sensitivity analysis
    "p_niche_invasion",  # sensitivity analysis
    "population_size",  # doesn't change
    "tournament_size",
]


In [None]:
df = df[
    (df["subsampling-fraction"] == 1.0) & (df["trie-postprocess"] == "naive")
].copy()


In [None]:
df["regime"] = df.apply(
    lambda row: {
        (1, 1, 2, 3.0517578125e-08): "plain",
        (1, 1, 1, 3.0517578125e-08): "neutral selection",
        (1, 1, 8, 3.0517578125e-08): "strong selection",
        (1, 4, 2, 3.0517578125e-06): "weak 4 niche ecology",
        (1, 4, 2, 3.0517578125e-08): "4 niche ecology",
        (1, 8, 2, 3.0517578125e-08): "8 niche ecology",
        (1024, 1, 2, 3.0517578125e-08): "spatial structure",
    }.get(
        tuple(
            row[
                [
                    "num_islands",
                    "num_niches",
                    "tournament_size",
                    "p_niche_invasion",
                ]
            ]
        ),
        np.nan,  # default
    ),
    axis="columns",
)


In [None]:
df["quality"] = df.apply(
    lambda row: {
        ("reconstructed-tree", 3.0): "33% resolution",
        ("reconstructed-tree", 10.0): "10% resolution",
        ("reconstructed-tree", 30.0): "3% resolution",
        ("reconstructed-tree", 100.0): "1% resolution",
        ("collapsed-phylogeny", 0): "perfect resolution",
    }.get(
        tuple(row[["a", "resolution"]].fillna(0)),
        np.nan,  # default
    ),
    axis="columns",
)
df = df.dropna(axis="index", subset=["regime", "quality"]).copy()
df["quality"].unique()


In [None]:
sensitivity_analysis_variables = [
    "epoch",
    "mut_distn",
]

df["sensitivity_analysis_variables"] = df[
    sensitivity_analysis_variables
].apply(kn.pack, axis=1)


In [None]:
stats = df.groupby(
    ["regime", "quality", "sensitivity_analysis_variables"], as_index=False
).agg(
    {
        "triplet_distance": ["mean", "median", "std", "max"],
    }
)
stats.columns = stats.columns.map(' '.join)
with pd.option_context("display.max_colwidth", None, "display.max_rows", None):
    display(stats)


In [None]:
os.makedirs("outdata", exist_ok=True)
stats.to_csv(
    "outdata/a=tree-reconstruction-quality-triplet-summary-statistics.csv", index=False
)


In [None]:
stats = df.groupby(
    ["regime", "quality", "sensitivity_analysis_variables"], as_index=False
).agg(
    {
        "quartet_distance": ["mean", "median", "std", "max"],
    }
)
stats.columns = stats.columns.map(' '.join)
with pd.option_context("display.max_colwidth", None, "display.max_rows", None):
    display(stats)


In [None]:
os.makedirs("outdata", exist_ok=True)
stats.to_csv(
    "outdata/a=tree-reconstruction-quality-quartet-summary-statistics.csv", index=False
)
