In [None]:
from keyname import keyname as kn
import matplotlib.pyplot as plt
from nbmetalog import nbmetalog as nbm
import numpy as np
import pandas as pd
import ptitprince as pt
import seaborn as sns
from teeplot import teeplot


In [None]:
# prints metadata about notebook runtime
nbm.print_metadata()


In [None]:
df = pd.read_csv("https://osf.io/ck47r/download")
dfdigest = np.bitwise_xor.reduce(
    pd.util.hash_pandas_object(df),
)
print("{:x}".format(dfdigest))
df


In [None]:
methodological_variables = [
#  'resolution',
 'subsampling-fraction',
 'trie-postprocess',  # pick one based on bias/accuracy
]

evolutionary_variables = [
 'mut_distn',  # sensitivity analysis
 'num_generations',  # sensitivity analysis
 'num_islands',
 'num_niches',
 'p_island_migration',  # sensitivity analysis
 'p_niche_invasion',  # sensitivity analysis
 'population_size',  # doesn't change
 'tournament_size',
]


In [None]:
df = df[
    (df['subsampling-fraction'] == 1.0) &
    (df['trie-postprocess'] == 'naive')
]


In [None]:
sensitivity_analysis_variables = [
    "epoch",
    "mut_distn",
]

df["sensitivity_analysis_variables"] = df[
    sensitivity_analysis_variables
].apply(kn.pack, axis=1)


In [None]:
df["regime"] = df.apply(
    lambda row: {
        (1, 1, 2, 3.0517578125e-08): "plain",
        (1, 1, 1, 3.0517578125e-08): "neutral_selection",
        (1, 1, 8, 3.0517578125e-08): "strong_selection",
        (1, 4, 2, 3.0517578125e-06): "weak_4_niche_ecology",
        (1, 4, 2, 3.0517578125e-08): "4_niche_ecology",
        (1, 8, 2, 3.0517578125e-08): "8_niche_ecology",
        (1024, 1, 2, 3.0517578125e-08): "spatial_structure",
    }.get(
        tuple(
            row[
                [
                    "num_islands",
                    "num_niches",
                    "tournament_size",
                    "p_niche_invasion",
                ]
            ]
        ),
        np.nan,  # default
    ),
    axis="columns",
)


In [None]:
for viz in sns.barplot, pt.RainCloud:
    for group, group_df in df.groupby(sensitivity_analysis_variables + ['regime']):
        teeplot.tee(
            viz,
            data=group_df,
            x='resolution',
            y='quartet_distance',
            teeplot_outattrs={
                a: b for a, b in zip(sensitivity_analysis_variables + ['regime'], group)
            }
        )
        plt.show()
