In [None]:
%load_ext autoreload
%autoreload 0

In [None]:
%autoreload

In [None]:
import sys
sys.path.append('/pollard/home/bsmith/Projects/haplo-benchmark/include/StrainFacts')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from lib.pandas_util import idxwhere
import sfacts as sf
import numpy as np
import xarray as xr
import seaborn as sns
from scipy.spatial.distance import cdist
import lib.plot
from tqdm import tqdm
from itertools import product
import lib.stats

def pvalue_to_significance_marker(p):
    if p < 1e-3:
        return '**'
    if p < 0.05:
        return '*'
    if p >= 0.05:
        return ''

In [None]:
#"data/sfacts_simulate-model_simplest_simulation-n{n}-g250-s{sim_s}-pi40-mu100-eps10-seed{sim_seed}.metagenotype-n{n}-g250.fit-sfacts41_gpu-s{fit_s}-seed{fit_seed}.evaluation.tsv"
n_to_s_ratio = 5

_benchmarks = []
for g, s, fit_type, sim_seed, fit_seed, fit_s_ratio in tqdm(list(product(
    [250, 1000],
    [10, 20, 40, 80, 200, 500],
    ['sfinder', 'sfacts44_gpu', 'sfacts44_cpu'],
    range(5),
    range(5),
    [0.5, 0.8, 1, 1.5],
))):
    n = int(s * n_to_s_ratio)
    sim_prefix = f"data/sfacts_simulate-model_simplest_simulation-n{n}-g{g}-s{s}-pi40-mu100-eps10-seed{sim_seed}"
    mgen_prefix = f"{sim_prefix}.metagenotype-n{n}-g{g}"
    fit_s = int(s * fit_s_ratio)
    eval_path = f"{mgen_prefix}.fit-{fit_type}-s{fit_s}-seed{fit_seed}.evaluation.tsv"
    try:
        bench = pd.read_table(eval_path, index_col='fit_path')
    except FileNotFoundError:
#         print(f"{eval_path} not found")
        continue
    
    meta_dict = dict(
        sim_seed=sim_seed,
        fit_seed=fit_seed,
        n=n,
        g=g,
        s=s,
        fit_s_ratio=fit_s_ratio,
        fit_s=fit_s,
        fit_type=fit_type,
    )
    for key in meta_dict:
        bench[key] = meta_dict[key]
    _benchmarks.append(bench)
    
    
    
for g, s, fit_type, sim_seed, fit_seed, fit_s_ratio in tqdm(list(product(
    [250],
    [40],
    ['mixtureS'],
    range(5),
    [0],
    [1.0],
))):
    n = int(s * n_to_s_ratio)
    sim_prefix = f"data/sfacts_simulate-model_simplest_simulation-n{n}-g{g}-s{s}-pi40-mu100-eps10-seed{sim_seed}"
    mgen_prefix = f"{sim_prefix}.metagenotype-n{n}-g{g}"
    fit_s = np.nan
    eval_path = f"{mgen_prefix}.fit-{fit_type}.evaluation.tsv"
    try:
        bench = pd.read_table(eval_path, index_col='fit_path')
    except FileNotFoundError:
#         print(f"{eval_path} not found")
        continue
    
    meta_dict = dict(
        sim_seed=sim_seed,
        fit_seed=fit_seed,
        n=n,
        g=g,
        s=s,
        fit_s_ratio=fit_s_ratio,
        fit_s=fit_s,
        fit_type=fit_type,
    )
    for key in meta_dict:
        bench[key] = meta_dict[key]
    _benchmarks.append(bench)
    

_benchmarks = pd.concat(_benchmarks)
print(_benchmarks.shape)
benchmarks2 = _benchmarks

In [None]:
#"data/sfacts_simulate-model_simplest_simulation-n{n}-g250-s{sim_s}-pi40-mu100-eps10-seed{sim_seed}.metagenotype-n{n}-g250.fit-sfacts41_gpu-s{fit_s}-seed{fit_seed}.evaluation.tsv"
n_to_s_ratio = 5

_benchmarks = []
for g, s, fit_type, sim_seed, fit_seed, fit_s_ratio in tqdm(list(product(
    [250, 1000],
    [10, 20, 40, 80, 200, 500],
    ['sfinder', 'sfacts44_gpu', 'sfacts44_cpu', 'mixtureS'],
    range(5),
    range(5),
    [0.5, 0.8, 1, 1.5],
))):
    n = int(s * n_to_s_ratio)
    sim_prefix = f"data/sfacts_simulate-model_simplest_simulation-n{n}-g{g}-s{s}-pi40-mu100-eps10-seed{sim_seed}"
    mgen_prefix = f"{sim_prefix}.metagenotype-n{n}-g{g}"
    fit_s = int(s * fit_s_ratio)
    eval_path = f"{mgen_prefix}.fit-{fit_type}-s{fit_s}-seed{fit_seed}.benchmark"
    try:
        bench = pd.read_table(eval_path)
    except FileNotFoundError:
#         print(f"{eval_path} not found")
        continue
    
    bench = bench['s'].to_frame(name='runtime_s')
    
    meta_dict = dict(
        sim_seed=sim_seed,
        fit_seed=fit_seed,
        n=n,
        g=g,
        s=s,
        fit_s_ratio=fit_s_ratio,
        fit_s=fit_s,
        fit_type=fit_type,
    )
    for key in meta_dict:
        bench[key] = meta_dict[key]
    _benchmarks.append(bench)
_benchmarks = pd.concat(_benchmarks)
print(_benchmarks.shape)
benchmarks1 = _benchmarks

In [None]:
(
    benchmarks1
    .groupby([
        'n',
        'fit_type',
#         'sim_seed',
        'fit_s_ratio',
    ])
    .apply(len)
    .unstack('fit_type')
)

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean


palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:purple',
    'sfacts44_gpu': 'tab:blue',
#     'sfacts41_big': 'lightblue',
    'sfinder': 'tab:green',
}

d0 = (
    benchmarks1
    .groupby([
        'n',
        'fit_type',
        'sim_seed',
        'fit_s_ratio',
    ])
    .runtime_s
    .agg(['mean', 'max', 'min', 'std', 'count'])
    .reset_index()
    .groupby([
        'n',
        'fit_type',
        'fit_s_ratio',
    ])
    .agg(['mean', 'max', 'median', 'count'])
    .reset_index()
)
# Drop the one sfinder run:
d0 = d0.drop(d0[lambda x: (x.n == 1_000) & (x.fit_s_ratio == 1) & (x.fit_type == 'sfinder')].index)

fig = plt.figure()

fit_label_map = {
    'sfacts44_gpu': 'StrainFacts (GPU)',
    'sfacts44_cpu': 'StrainFacts (CPU)',
    'sfinder': 'Strain Finder (CPU)'

}

for fit_type in ['sfinder', 'sfacts44_cpu', 'sfacts44_gpu']:
    d1 = d0[lambda x: x.fit_s_ratio==1].sort_values('n')[lambda x: x.fit_type == fit_type]
    if not fit_type in palette:
        continue
    plt.plot(d1['n'], d1[('mean', 'mean')], c=palette[fit_type], label=fit_label_map[fit_type])
    
    
for fit_type, d1 in d0[lambda x: x.fit_s_ratio==1.5].sort_values('n').groupby('fit_type'):
    if not fit_type in palette:
        continue
    plt.plot(d1['n'], d1[('mean', 'mean')], c=palette[fit_type], linestyle='--', label='__nolegend__')
    
plt.plot([], [], c='grey', linestyle='-', label='1x strains')
plt.plot([], [], c='grey', linestyle='--', label='1.5x strains')

plt.yscale('log')
plt.xscale('log')
plt.legend()

plt.ylabel('mean runtime (sec)')
plt.xlabel('samples (N)')

In [None]:
d0[lambda x: x.fit_type=='sfacts44_cpu'].set_index(['n', 'fit_s_ratio'])[[('max', 'max'), ('mean', 'mean')]] / 3600

In [None]:
d0[lambda x: x.fit_type=='sfacts44_gpu'].set_index(['n', 'fit_s_ratio'])[[('max', 'max'), ('mean', 'mean')]] / 3600

In [None]:
d0[lambda x: x.fit_type=='sfinder'].set_index(['n', 'fit_s_ratio'])[[('max', 'max'), ('mean', 'mean')]] / 3600

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean

fit_type_list = ['sfinder', 'sfacts44_cpu', 'sfacts44_gpu']

palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:blue',
    'sfacts44_gpu': 'tab:purple',
#     'sfacts41_big': 'lightblue',
    'sfinder': 'tab:green',
}

d0 = (
    benchmarks1
    .groupby([
        'n',
        'fit_type',
#         'sim_seed',
        'fit_s_ratio',
    ])
    .runtime_s
    .agg(['median'])
    .reset_index()
)
# Drop the one sfinder run:
d0 = d0.drop(d0[lambda x: (x.n == 1_000) & (x.fit_s_ratio == 1) & (x.fit_type == 'sfinder')].index)

fig = plt.figure()

fit_label_map = {
    'sfacts44_gpu': 'StrainFacts (GPU)',
    'sfacts44_cpu': 'StrainFacts (CPU)',
    'sfinder': 'Strain Finder (CPU)'

}

for fit_type in fit_type_list:
    d1 = d0[lambda x: x.fit_s_ratio==1].sort_values('n')[lambda x: x.fit_type == fit_type]
    if not fit_type in palette:
        continue
    plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='-', label=fit_label_map[fit_type], lw=2)
    
    
for fit_type in fit_type_list:
    d1 = d0[lambda x: x.fit_s_ratio==1.5].sort_values('n')[lambda x: x.fit_type == fit_type]
    if not fit_type in palette:
        continue
    plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='--', label='__nolegend__', lw=2)
    
    
plt.plot([], [], c='grey', linestyle='-', label='1x strains', lw=2)
plt.plot([], [], c='grey', linestyle='--', label='1.5x strains', lw=2)

plt.yscale('log')
plt.xscale('log')
plt.legend()

plt.ylabel('median runtime (sec)')
plt.xlabel('samples (N)')

plt.savefig('fig/runtime_profiling.pdf', dpi=400)

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean

fit_type_list = ['sfinder']

palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:blue',
    'sfacts44_gpu': 'tab:purple',
#     'sfacts41_big': 'lightblue',
    'sfinder': 'tab:green',
}

d0 = (
    benchmarks1
    .groupby([
        'n',
        'fit_type',
#         'sim_seed',
        'fit_s_ratio',
    ])
    .runtime_s
    .agg(['median'])
    .reset_index()
)
# Drop the one sfinder run:
d0 = d0.drop(d0[lambda x: (x.n == 1_000) & (x.fit_s_ratio == 1) & (x.fit_type == 'sfinder')].index)

fig = plt.figure()

fit_label_map = {
    'sfacts44_gpu': 'StrainFacts (GPU)',
    'sfacts44_cpu': 'StrainFacts (CPU)',
    'sfinder': 'Strain Finder'

}

for fit_type in fit_type_list:
    d1 = d0[lambda x: x.fit_s_ratio==1].sort_values('n')[lambda x: x.fit_type == fit_type]
    if not fit_type in palette:
        continue
    plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='-', label=fit_label_map[fit_type], lw=2)
    
    
# for fit_type in fit_type_list:
#     d1 = d0[lambda x: x.fit_s_ratio==1.5].sort_values('n')[lambda x: x.fit_type == fit_type]
#     if not fit_type in palette:
#         continue
#     plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='--', label='__nolegend__', lw=2)
    
    
# plt.plot([], [], c='grey', linestyle='-', label='1x strains', lw=2)
# plt.plot([], [], c='grey', linestyle='--', label='1.5x strains', lw=2)

plt.yscale('log')
plt.xscale('log')
plt.xlim(2e1, 1e4)
plt.ylim(1e1, 1e6)

plt.legend()

plt.ylabel('median runtime (sec)')
plt.xlabel('samples (N)')

plt.savefig('fig/runtime_profiling_just_sfinder_1x.png', dpi=400)

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean

fit_type_list = ['sfinder', 'sfacts44_cpu', ]

palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:blue',
    'sfacts44_gpu': 'tab:purple',
#     'sfacts41_big': 'lightblue',
    'sfinder': 'tab:green',
}

d0 = (
    benchmarks1
    .groupby([
        'n',
        'fit_type',
#         'sim_seed',
        'fit_s_ratio',
    ])
    .runtime_s
    .agg(['median'])
    .reset_index()
)
# Drop the one sfinder run:
d0 = d0.drop(d0[lambda x: (x.n == 1_000) & (x.fit_s_ratio == 1) & (x.fit_type == 'sfinder')].index)

fig = plt.figure()

fit_label_map = {
    'sfacts44_gpu': 'StrainFacts (GPU)',
    'sfacts44_cpu': 'StrainFacts (CPU)',
    'sfinder': 'Strain Finder'

}

for fit_type in fit_type_list:
    d1 = d0[lambda x: x.fit_s_ratio==1].sort_values('n')[lambda x: x.fit_type == fit_type]
    if not fit_type in palette:
        continue
    plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='-', label=fit_label_map[fit_type], lw=2)
    
    
# for fit_type in fit_type_list:
#     d1 = d0[lambda x: x.fit_s_ratio==1.5].sort_values('n')[lambda x: x.fit_type == fit_type]
#     if not fit_type in palette:
#         continue
#     plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='--', label='__nolegend__', lw=2)
    
    
# plt.plot([], [], c='grey', linestyle='-', label='1x strains', lw=2)
# plt.plot([], [], c='grey', linestyle='--', label='1.5x strains', lw=2)

plt.yscale('log')
plt.xscale('log')
plt.xlim(2e1, 1e4)
plt.ylim(1e1, 1e6)

plt.legend()

plt.ylabel('median runtime (sec)')
plt.xlabel('samples (N)')

plt.savefig('fig/runtime_profiling_both_1x.png', dpi=400)

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean

fit_type_list = ['sfinder', 'sfacts44_cpu', 'sfacts44_gpu']

palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:blue',
    'sfacts44_gpu': 'tab:purple',
#     'sfacts41_big': 'lightblue',
    'sfinder': 'tab:green',
}

d0 = (
    benchmarks1
    .groupby([
        'n',
        'fit_type',
#         'sim_seed',
        'fit_s_ratio',
    ])
    .runtime_s
    .agg(['median'])
    .reset_index()
)
# Drop the one sfinder run:
d0 = d0.drop(d0[lambda x: (x.n == 1_000) & (x.fit_s_ratio == 1) & (x.fit_type == 'sfinder')].index)

fig = plt.figure()

fit_label_map = {
    'sfacts44_gpu': 'StrainFacts (GPU)',
    'sfacts44_cpu': 'StrainFacts (CPU)',
    'sfinder': 'Strain Finder'

}

for fit_type in fit_type_list:
    d1 = d0[lambda x: x.fit_s_ratio==1].sort_values('n')[lambda x: x.fit_type == fit_type]
    if not fit_type in palette:
        continue
    plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='-', label=fit_label_map[fit_type], lw=2)
    
    
# for fit_type in fit_type_list:
#     d1 = d0[lambda x: x.fit_s_ratio==1.5].sort_values('n')[lambda x: x.fit_type == fit_type]
#     if not fit_type in palette:
#         continue
#     plt.plot(d1['n'], d1['median'], c=palette[fit_type], linestyle='--', label='__nolegend__', lw=2)
    
    
# plt.plot([], [], c='grey', linestyle='-', label='1x strains', lw=2)
# plt.plot([], [], c='grey', linestyle='--', label='1.5x strains', lw=2)

plt.yscale('log')
plt.xscale('log')
plt.xlim(2e1, 1e4)
plt.ylim(1e1, 1e6)

plt.legend()

plt.ylabel('median runtime (sec)')
plt.xlabel('samples (N)')

plt.savefig('fig/runtime_profiling_all_1x.png', dpi=400)

In [None]:
(
    benchmarks1
    .assign(runtime_min=lambda x: x.runtime_s / 60)
    .groupby([
        'n',
        'fit_type',
        'fit_s_ratio',
    ])
    .runtime_min
    .agg(['median', 'mean', 'max', 'min', 'std', 'count'])
    .reset_index()
    .set_index(['n', 'fit_s_ratio', 'fit_type']).sort_index()
)

In [None]:
d0 = (
    benchmarks2
#     [lambda x: x.g == 250]
#     [lambda x: (
# #         (x.mu == 5)
# #         (x.fit_seed == 0)
#         (x.fit_type.isin([
#             'sfinder',
#             'sfacts1_cpu',
#             'sfacts1_gpu',
# #             'sfacts2',
# #             'sfacts3',
#         ]))
#     )]
    .groupby([
        's',
        'g',
        'fit_type',
        'sim_seed',
        'fit_s_ratio',
#         'fit_seed',
    ], as_index=False)
#     .apply(lambda d: d.loc[d.metagenotype_prediction_error.idxmin()])
    .min()
)
# d0['fit_type_fit_s'] = d0['fit_type'] + '-s' + d0['fit_s'].astype(str)

palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'purple',
    'sfacts44_gpu': 'blue',
#     'sfacts41_big': 'lightblue',
    'sfinder': 'green',
#     'sfacts44_big': 'peachpuff',
#     'sfacts45_big': 'violet',
#     'sfacts46_big': 'lightgreen',

}

metric_list = [
#     ("metagenotype_prediction_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
    ("braycurtis_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("unifrac_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("unifrac_cis_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("rank_abundance_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("community_entropy_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("fwd_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("rev_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("fwd_discrete_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("rev_discrete_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("runtime", dict(value='log'), dict(bottom=1e1, top=1e6)),
          ]


s_list = [10, 20, 40, 80, 200, 500]
fit_s_ratio_list = [0.8, 1, 1.5]
g = 250
ncol = len(fit_s_ratio_list)
nrow = len(metric_list)
# nrow = int(np.ceil(len(metrics) / ncol))

fig, axs = plt.subplots(nrow, ncol, figsize=(5 * ncol, 2 * nrow), sharey='row', sharex=True)
axs = axs.reshape((nrow, ncol))

for (met, scale_kws, ylim_kws), row in zip(metric_list, axs):
    row[0].set_yscale(**scale_kws)
    row[0].set_ylim(**ylim_kws)
    row[0].set_ylabel(met)
    for fit_s_ratio, ax in zip(fit_s_ratio_list, row):
        d1 = d0[(d0.fit_s_ratio == fit_s_ratio) & (d0.g == g)]
        ax.set_title(f'fit_s_ratio={fit_s_ratio}')
        sns.stripplot(
            x='s',
            y=met,
            data=d1,
            hue='fit_type',
            hue_order=palette.keys(),
            order=s_list,
            s=6,
            palette=palette,
            ax=ax,
            jitter=True,
            alpha=0.7,
            dodge=True,
        )
    
    
for ax in axs.flatten()[1:]:
    leg = ax.get_legend()
    if leg:
        leg.remove()
        
# for ax in axs[:-1].flatten():
#     ax.set_ylim(bottom=1e-4, top=1e0)
    
fig.tight_layout()

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean


palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'purple',
#     'sfacts41_gpu': 'blue',
#     'sfacts44_big': 'peachpuff',
    'sfinder': 'green',
}

d0 = (
    benchmarks2
#     [lambda x: x.g == 250]
#     [lambda x: (
# #         (x.mu == 5)
# #         (x.fit_seed == 0)
#         (x.fit_type.isin([
#             'sfinder',
#             'sfacts1_cpu',
#             'sfacts1_gpu',
# #             'sfacts2',
# #             'sfacts3',
#         ]))
#     )]
    .groupby([
        's',
        'g',
        'n',
        'fit_type',
        'fit_s_ratio',
        'sim_seed',
#         'fit_seed',
    ], as_index=False)
    .agg(['mean', 'min', 'max', 'std', 'count'])
    .reset_index()
    .groupby([
        's',
        'g',
        'n',
        'fit_type',
        'fit_s_ratio',
#         'sim_seed',
#         'fit_seed',
    ])
    .agg(['mean', 'count'])
)

g = 250
s = 40


fit_type_list = list(palette.keys())

d1 = d0.reset_index()[lambda x: (x.s==s) & (x.fit_type.isin(fit_type_list)) & (x.g == g)]
metric_list = [
#     ("metagenotype_prediction_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
    ("braycurtis_trans_error", dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("unifrac_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("unifrac_cis_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("community_entropy_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("rank_abundance_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("fwd_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("rev_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("fwd_discrete_genotype_error", dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("rev_discrete_genotype_error", dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("runtime", dict(value='log'), dict(bottom=1e1, top=1e6)),
          ]

fig, axs = plt.subplots(1, len(metric_list), figsize=(2 * len(metric_list), 4))

for (met, scale_kws, ylim_kws), ax in zip(metric_list, axs):
    ax.set_ylabel(met)
    ax.set_yscale(**scale_kws)
    ax.set_ylim(**ylim_kws)
    ax.set_xticks([0.5, 0.8, 1, 1.5])
    ax.set_xticklabels(['0.5x', '0.8x', '1.0x', '1.5x'])
#     ax.set_ylim(1e-5)
    for fit_s_ratio, d2 in d1.groupby(['fit_s_ratio']):
        for fit_type, offset in zip(fit_type_list, np.linspace(-0.1, 0.1, num=len(fit_type_list))):
            d3 = d2[lambda x: (x.fit_type == fit_type)]
#             print(fit_s_ratio, fit_type, d3.shape)
            if d3.empty:
                continue
#             ax.scatter([fit_s_ratio + offset], d3[(met, 'min', 'mean')], edgecolor=palette[fit_type], color='white')
            ax.scatter([fit_s_ratio + offset], d3[(met, 'mean', 'mean')], color=palette[fit_type])
#             ax.vlines([fit_s_ratio + offset], d3[(met, 'min', 'mean')], d3[(met, 'mean', 'mean')], color=palette[fit_type])
            ax.vlines([fit_s_ratio + offset], d3[(met, 'min', 'mean')], d3[(met, 'max', 'mean')], color=palette[fit_type])

    
    
fig.tight_layout()

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean


palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'purple',
#     'sfacts41_gpu': 'blue',
#     'sfacts44_big': 'peachpuff',
    'sfinder': 'green',
}

d0 = (
    benchmarks2
#     [lambda x: x.g == 250]
#     [lambda x: (
# #         (x.mu == 5)
# #         (x.fit_seed == 0)
#         (x.fit_type.isin([
#             'sfinder',
#             'sfacts1_cpu',
#             'sfacts1_gpu',
# #             'sfacts2',
# #             'sfacts3',
#         ]))
#     )]
    .groupby([
        's',
        'g',
        'n',
        'fit_type',
        'fit_s_ratio',
        'sim_seed',
#         'fit_seed',
    ], as_index=False)
    .agg(['mean', 'min', 'max', 'std', 'count', 'median'])
    .reset_index()
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ])
#     .agg(['mean', 'count'])
)

d0['jitter'] = np.random.random(d0.shape[0])

g = 250
s = 40


fit_type_list = list(palette.keys())

d1 = d0.reset_index()[lambda x: (x.s==s) & (x.fit_type.isin(fit_type_list)) & (x.g == g)]
metric_list = [
#     ("metagenotype_prediction_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("braycurtis_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("unifrac_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("unifrac_cis_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-2, top=1e0)),
    ("community_entropy_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-2, top=1e0)),
#     ("rank_abundance_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("fwd_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("rev_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("fwd_discrete_genotype_error", dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("rev_discrete_genotype_error", dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("runtime", dict(value='log'), dict(bottom=1e1, top=1e6)),
          ]

fig, axs = plt.subplots(1, len(metric_list), figsize=(2 * len(metric_list), 4), sharex=True)

for (met, scale_kws, ylim_kws), ax in zip(metric_list, axs):
    ax.set_ylabel(met)
    ax.set_yscale(**scale_kws)
    ax.set_ylim(**ylim_kws)
#     ax.set_yscale('symlog', linthresh=1e-5)
    ax.set_xticks([0.5, 0.8, 1, 1.5])
    ax.set_xticklabels(['0.5x', '0.8x', '1.0x', '1.5x'])
#     ax.set_ylim(1e-5)
    for fit_s_ratio, d2 in d1.groupby(['fit_s_ratio']):
        mwu = lib.stats.mannwhitneyu('fit_type', (met, 'min'), data=d2.sort_values('sim_seed'))
        print(met, fit_s_ratio, mwu, d2['fit_s_ratio'].values[0] + 0.04)
        ax.annotate(
            pvalue_to_significance_marker(mwu[1]),
            xy=(
                d2['fit_s_ratio'].values[0] + 0.04,
                0.87
            ), ha='center', va='top', fontsize=13)
        for fit_type, offset in zip(fit_type_list, np.linspace(-1, 1, num=len(fit_type_list))):
            d3 = d2[lambda x: (x.fit_type == fit_type)]
#             print(fit_s_ratio, fit_type, d3.shape)
            if d3.empty:
                continue
            ax.scatter(d3['fit_s_ratio'] + d3['jitter'] * 0.07 + offset * 0.1, d3[(met, 'min')], edgecolor='lightgrey', color=palette[fit_type], alpha=0.7)
#             ax.vlines(d3['fit_s_ratio'] + d3['jitter'] * 0.07 + offset * 0.1, d3[(met, 'min')], d3[(met, 'median')], color=palette[fit_type], lw=1, alpha=0.7)

    
fig.tight_layout()

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean


palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'purple',
#     'sfacts41_gpu': 'blue',
#     'sfacts44_big': 'peachpuff',
    'sfinder': 'green',
}

d0 = (
    benchmarks2
#     [lambda x: x.g == 250]
#     [lambda x: (
# #         (x.mu == 5)
# #         (x.fit_seed == 0)
#         (x.fit_type.isin([
#             'sfinder',
#             'sfacts1_cpu',
#             'sfacts1_gpu',
# #             'sfacts2',
# #             'sfacts3',
#         ]))
#     )]
    .groupby([
        's',
        'g',
        'n',
        'fit_type',
        'fit_s_ratio',
        'sim_seed',
#         'fit_seed',
    ], as_index=False)
    .agg(['mean', 'min', 'max', 'std', 'count', 'median'])
    .reset_index()
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ])
#     .agg(['mean', 'count'])
)

# d0['jitter'] = np.random.random(d0.shape[0])

g = 250
s = 40


fit_type_list = list(palette.keys())

d1 = d0.reset_index()[lambda x: (x.s==s) & (x.fit_type.isin(fit_type_list)) & (x.g == g)]
metric_list = [
#     ("metagenotype_prediction_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("braycurtis_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("unifrac_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("unifrac_cis_error", 'Unifrac Error', dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-2, top=1e0)),
    ("community_entropy_error", 'Community Entropy Error', dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-2, top=1e0)),
#     ("rank_abundance_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("fwd_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("rev_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("fwd_discrete_genotype_error", 'Forward Genotype Error', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("rev_discrete_genotype_error", 'Reverse Genotype Error', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("runtime", dict(value='log'), dict(bottom=1e1, top=1e6)),
          ]


fig, axs = plt.subplots(1, len(metric_list), figsize=(3 * len(metric_list), 4), sharex=True)

for (met, axis_label, scale_kws, ylim_kws), ax in zip(metric_list, axs):
    ax.set_ylabel(axis_label)
    ax.set_yscale(**scale_kws)
    ax.set_ylim(**ylim_kws)
#     ax.set_yscale('symlog', linthresh=1e-5)
    ax.set_xticks([0.5, 0.8, 1, 1.5])
    ax.set_xticklabels(['0.5x', '0.8x', '1.0x', '1.5x'])
    ax.set_xlabel('strains')
#     ax.set_ylim(1e-5)
    for fit_s_ratio, d2 in d1.groupby(['fit_s_ratio']):
        mwu = lib.stats.mannwhitneyu('fit_type', (met, 'min'), data=d2.sort_values('sim_seed'))
        print(met, fit_s_ratio, mwu, d2['fit_s_ratio'].values[0] + 0.04)
        ax.annotate(
            pvalue_to_significance_marker(mwu[1]),
            xy=(
                d2['fit_s_ratio'].values[0] + 0.04,
                0.87
            ), ha='center', va='top', fontsize=13)
        for fit_type, offset in zip(fit_type_list, np.linspace(-1, 1, num=len(fit_type_list))):
            d3 = d2[lambda x: (x.fit_type == fit_type)].sort_values((met, 'min'))
            d3['jitter'] = np.linspace(-1, 1, num=d3.shape[0])
#             print(fit_s_ratio, fit_type, d3.shape)
            if d3.empty:
                continue
            ax.scatter(d3['fit_s_ratio'] + d3['jitter'] * 0.07 + offset * 0.1, d3[(met, 'min')], edgecolor='lightgrey', color=palette[fit_type], alpha=0.7)
            ax.vlines(d3['fit_s_ratio'] + d3['jitter'] * 0.07 + offset * 0.1, d3[(met, 'min')], d3[(met, 'median')], color=palette[fit_type], lw=1, alpha=0.7)

    
fig.tight_layout(w_pad=2.)

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean


palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:blue',
#     'sfacts41_gpu': 'blue',
#     'sfacts44_big': 'peachpuff',
    'sfinder': 'tab:green',
}

d0 = (
    benchmarks2
#     [lambda x: x.g == 250]
#     [lambda x: (
# #         (x.mu == 5)
# #         (x.fit_seed == 0)
#         (x.fit_type.isin([
#             'sfinder',
#             'sfacts1_cpu',
#             'sfacts1_gpu',
# #             'sfacts2',
# #             'sfacts3',
#         ]))
#     )]
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ], as_index=False)
#     .agg(['mean', 'min', 'max', 'std', 'count', 'median'])
#     .reset_index()
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ])
#     .agg(['mean', 'count'])
)

d0['jitter'] = np.random.random(d0.shape[0]) * 2 - 1

g = 250
s = 40


fit_type_list = list(palette.keys())

d1 = d0.reset_index()[lambda x: (x.s==s) & (x.fit_type.isin(fit_type_list)) & (x.g == g)]
metric_list = [
#     ("metagenotype_prediction_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("unifrac_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("unifrac_cis_error", 'Unifrac Distance', 'mean distance', dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=2e0)),
    ("braycurtis_trans_error", "Pairwise Bray-Curtis", 'mean absolute error', dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=2e0)),
    ("community_entropy_error", 'Compositional Entropy', 'mean absolute error', dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=2e0)),
#     ("rank_abundance_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("fwd_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("rev_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("fwd_discrete_genotype_error", 'Best Match to True Genotype', 'weighted mean distance', dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=2e0)),
    ("rev_discrete_genotype_error", 'Best Match to Inferred Genotype', 'weighted mean distance', dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=2e0)),
#     ("runtime", dict(value='log'), dict(bottom=1e1, top=1e6)),
          ]

ncol = 3
nrow = int(np.ceil(len(metric_list) / ncol))

fig, axs = plt.subplots(nrow, ncol, figsize=(3.6 * ncol, 4 * nrow), sharex=True)

for (met, title, axis_label, scale_kws, ylim_kws), ax in zip(metric_list, axs.flatten()):
    ax.set_title(title)
    ax.set_ylabel(axis_label)
    ax.set_yscale(**scale_kws)
    ax.set_ylim(**ylim_kws)
#     ax.set_yscale('symlog', linthresh=1e-5)
    ax.set_xticks([0.5, 0.8, 1, 1.5])
    ax.set_xticklabels(['0.5x', '0.8x', '1.0x', '1.5x'])
#     ax.set_xlim(0.85, 1.65)
    ax.set_xlabel('strains')
#     ax.set_ylim(1e-5)
    for fit_s_ratio, d2 in d1.groupby(['fit_s_ratio']):
        mwu = lib.stats.wilcoxon('fit_type', met, data=d2.sort_values(['sim_seed', 'fit_seed']))
        print(met, fit_s_ratio, mwu, )
        ax.annotate(
            pvalue_to_significance_marker(mwu[1]),
            xy=(
                d2['fit_s_ratio'].values[0],
                0.87
            ), ha='center', va='top', fontsize=13)
        for fit_type, offset in zip(fit_type_list, np.linspace(-1, 1, num=len(fit_type_list))):
            d3 = d2[lambda x: (x.fit_type == fit_type)]#.sort_values((met, 'min'))
            print(d3[met].median())
#             print(fit_s_ratio, fit_type, d3.shape)
            if d3.empty:
                continue
            ax.scatter(d3['fit_s_ratio'] + d3['jitter'] * 0.04 + offset * 0.07, d3[met], edgecolor='lightgrey', color=palette[fit_type], alpha=0.7)

axs[-1, -1].axis('off')

    
fig.tight_layout(w_pad=2., h_pad=3.)

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean


palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:blue',
#     'sfacts41_gpu': 'blue',
#     'sfacts44_big': 'peachpuff',
    'sfinder': 'tab:green',
#     'mixtureS': 'tab:cyan'
}

d0 = (
    benchmarks2
    [lambda x: x.fit_s_ratio.isin([0.8, 1.0, 1.5])]
    .assign(xpos=lambda x: x.fit_s_ratio.map({0.8: 0.5, 1.0: 1.0, 1.5: 1.5}))
#     [lambda x: x.g == 250]
#     [lambda x: (
# #         (x.mu == 5)
# #         (x.fit_seed == 0)
#         (x.fit_type.isin([
#             'sfinder',
#             'sfacts1_cpu',
#             'sfacts1_gpu',
# #             'sfacts2',
# #             'sfacts3',
#         ]))
#     )]
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ], as_index=False)
#     .agg(['mean', 'min', 'max', 'std', 'count', 'median'])
#     .reset_index()
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ])
#     .agg(['mean', 'count'])
)

d0['jitter'] = np.random.random(d0.shape[0]) * 2 - 1

g = 250
s = 40


fit_type_list = list(palette.keys())

d1 = d0.reset_index()[lambda x: (x.s==s) & (x.fit_type.isin(fit_type_list)) & (x.g == g)]
metric_list = [
#     ("metagenotype_prediction_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("unifrac_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("unifrac_cis_error", 'Unifrac Distance', 'mean distance', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
    ("braycurtis_trans_error", "Pairwise Bray-Curtis", 'mean absolute error', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
    ("community_entropy_error", 'Compositional Entropy', 'mean absolute error', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
#     ("rank_abundance_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("fwd_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("rev_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("fwd_discrete_genotype_error", 'Best Match to True Genotype', 'weighted mean distance', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
    ("rev_discrete_genotype_error", 'Best Match to Inferred Genotype', 'weighted mean distance', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
#     ("runtime", dict(value='log'), dict(bottom=1e1, top=1e6)),
          ]

ncol = 3
nrow = int(np.ceil(len(metric_list) / ncol))

fig, axs = plt.subplots(nrow, ncol, figsize=(3.6 * ncol, 4 * nrow), sharex=True)

for panel_letter, (met, title, axis_label, scale_kws, ylim_kws), ax in zip(["A", "B", "C", "D", "E"], metric_list, axs.flatten()):
    ax.set_title(title)
    ax.set_ylabel(axis_label)
    ax.set_yscale(**scale_kws)
    ax.set_ylim(**ylim_kws)
#     ax.set_yscale('symlog', linthresh=1e-5)
    ax.set_xticks([0.5, 1, 1.5])
    ax.set_xticklabels(['0.8x', '1.0x', '1.5x'])
#     ax.set_xlim(0.85, 1.65)
    ax.set_xlabel('strains')
    ax.annotate(panel_letter, xy=(-0.1, 1.05), xycoords='axes fraction', fontsize=14, fontweight='bold')
#     ax.set_ylim(1e-5)
    for fit_s_ratio, d2 in d1.groupby(['fit_s_ratio']):
        d3 = d2.drop(idxwhere(d2.fit_type == 'mixtureS'))
        mwu = lib.stats.mannwhitneyu('fit_type', met, data=d3.sort_values(['sim_seed', 'fit_seed']))
        print(met, fit_s_ratio, mwu, )
        ax.annotate(
            pvalue_to_significance_marker(mwu[1]),
            xy=(
                d2['xpos'].values[0],
                1.5
            ), ha='center', va='top', fontsize=13)
        for fit_type, offset in zip(fit_type_list, np.linspace(-1, 1, num=len(fit_type_list))):
            d3 = d2[lambda x: (x.fit_type == fit_type)]#.sort_values((met, 'min'))
            print(d3[met].median())
#             print(fit_s_ratio, fit_type, d3.shape)
            if d3.empty:
                continue
            ax.scatter(d3['xpos'] + d3['jitter'] * 0.04 + offset * 0.07, d3[met], edgecolor='lightgrey', color=palette[fit_type], alpha=0.7)

axs[-1, -1].axis('off')

    
fig.tight_layout(w_pad=2., h_pad=3.)
fig.savefig('fig/accuracy_benchmarking.pdf', dpi=400)

In [None]:
fig, ax = plt.subplots(figsize=(2.0, 1.0))
for fit_type, fit_name in [('sfacts44_cpu', 'StrainFacts'), ('sfinder', 'Strain Finder')]:
    ax.scatter([], [], edgecolor='lightgrey', color=palette[fit_type], label=fit_name, s=100)
ax.legend(loc='upper left')
ax.axis('off')
fig.savefig('fig/accuracy_benchmarking_legend.pdf', dpi=400)

In [None]:
# Aggregate simulation results by finding the mean/min/std of each fit_seed
# and then taking the mean of each of these values over the sim_seed.
# I could then plot each runtime value as the mean


palette = {
# 'sfinder', 'sfacts1_cpu', 'sfacts1_gpu', 'sfacts2', 'sfacts3'
    'sfacts44_cpu': 'tab:blue',
#     'sfacts41_gpu': 'blue',
#     'sfacts44_big': 'peachpuff',
    'sfinder': 'tab:green',
    'mixtureS': 'tab:cyan'
}

d0 = (
    benchmarks2
    [lambda x: x.fit_s_ratio.isin([0.8, 1.0, 1.5])]
    .assign(xpos=lambda x: x.fit_s_ratio.map({0.8: 0.5, 1.0: 1.0, 1.5: 1.5}))
#     [lambda x: x.g == 250]
#     [lambda x: (
# #         (x.mu == 5)
# #         (x.fit_seed == 0)
#         (x.fit_type.isin([
#             'sfinder',
#             'sfacts1_cpu',
#             'sfacts1_gpu',
# #             'sfacts2',
# #             'sfacts3',
#         ]))
#     )]
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ], as_index=False)
#     .agg(['mean', 'min', 'max', 'std', 'count', 'median'])
#     .reset_index()
#     .groupby([
#         's',
#         'g',
#         'n',
#         'fit_type',
#         'fit_s_ratio',
# #         'sim_seed',
# #         'fit_seed',
#     ])
#     .agg(['mean', 'count'])
)

d0['jitter'] = np.random.random(d0.shape[0]) * 2 - 1

g = 250
s = 40


fit_type_list = list(palette.keys())

d1 = d0.reset_index()[lambda x: (x.s==s) & (x.fit_type.isin(fit_type_list)) & (x.g == g)]
metric_list = [
#     ("metagenotype_prediction_error", dict(value='symlog', linthresh=1e-2, linscale=0.1), dict(bottom=-1e-3, top=1e0)),
#     ("unifrac_trans_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("unifrac_cis_error", 'Unifrac Distance', 'mean distance', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
#     ("braycurtis_trans_error", "Pairwise Bray-Curtis", 'mean absolute error', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
    ("community_entropy_error", 'Compositional Entropy', 'mean absolute error', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
#     ("rank_abundance_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("fwd_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
#     ("rev_genotype_error", dict(value='symlog', linthresh=1e-3, linscale=0.1), dict(bottom=-1e-4, top=1e0)),
    ("fwd_discrete_genotype_error", 'Best Match to True Genotype', 'weighted mean distance', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
    ("rev_discrete_genotype_error", 'Best Match to Inferred Genotype', 'weighted mean distance', dict(value='symlog', linthresh=1e-4, linscale=0.1), dict(bottom=-1e-5, top=2e0)),
#     ("runtime", dict(value='log'), dict(bottom=1e1, top=1e6)),
          ]

ncol = 2
nrow = int(np.ceil(len(metric_list) / ncol))

fig, axs = plt.subplots(nrow, ncol, figsize=(3.6 * ncol, 4 * nrow), sharex=True)

for panel_letter, (met, title, axis_label, scale_kws, ylim_kws), ax in zip(["A", "B", "C", "D", "E"], metric_list, axs.flatten()):
    ax.set_title(title)
    ax.set_ylabel(axis_label)
    ax.set_yscale(**scale_kws)
    ax.set_ylim(**ylim_kws)
#     ax.set_yscale('symlog', linthresh=1e-5)
    ax.set_xticks([0.5, 1, 1.5])
    ax.set_xticklabels(['0.8x', '1.0x', '1.5x'])
#     ax.set_xlim(0.85, 1.65)
    ax.set_xlabel('strains')
    ax.annotate(panel_letter, xy=(-0.1, 1.05), xycoords='axes fraction', fontsize=14, fontweight='bold')
#     ax.set_ylim(1e-5)
    for fit_s_ratio, d2 in d1.groupby(['fit_s_ratio']):
#         d3 = d2.drop(idxwhere(d2.fit_type == 'mixtureS'))
#         mwu = lib.stats.mannwhitneyu('fit_type', met, data=d3.sort_values(['sim_seed', 'fit_seed']))
#         print(met, fit_s_ratio, mwu, )
#         ax.annotate(
#             pvalue_to_significance_marker(mwu[1]),
#             xy=(
#                 d2['xpos'].values[0],
#                 1.5
#             ), ha='center', va='top', fontsize=13)
        for fit_type, offset in zip(fit_type_list, np.linspace(-1, 1, num=len(fit_type_list))):
            d3 = d2[lambda x: (x.fit_type == fit_type)]#.sort_values((met, 'min'))
            print(d3[met].median())
#             print(fit_s_ratio, fit_type, d3.shape)
            if d3.empty:
                continue
            ax.scatter(d3['xpos'] + d3['jitter'] * 0.04 + offset * 0.15, d3[met], edgecolor='lightgrey', color=palette[fit_type], alpha=0.7)

# axs[-1, -1].axis('off')

    
fig.tight_layout(w_pad=2., h_pad=3.)
fig.savefig('fig/accuracy_benchmarking_with_mixtureS.pdf', dpi=400)

In [None]:
fig, ax = plt.subplots(figsize=(2.0, 1.0))
for fit_type, fit_name in [('sfacts44_cpu', 'StrainFacts'), ('sfinder', 'Strain Finder'), ('mixtureS', 'MixtureS')]:
    ax.scatter([], [], edgecolor='lightgrey', color=palette[fit_type], label=fit_name, s=100)
ax.legend(loc='upper left')
ax.axis('off')
fig.savefig('fig/accuracy_benchmarking_with_mixtureS_legend.pdf', dpi=400)

In [None]:
_benchmarks = []
for n, g, s, sim_seed, fit_seed in tqdm(list(product(
    [100, 200, 500, 1000, 2500, 10000],
    [250, 500, 1000],
    [20, 40, 100, 200, 400],
    range(5),
    range(5),
))):
    sim_prefix = f"data/sfacts_simulate-model_simplest_simulation-n{n}-g{g}-s{s}-pi40-mu100-eps10-seed{sim_seed}"
    mgen_prefix = f"{sim_prefix}.metagenotype-n{n}-g{g}"
    eval_path = f"{mgen_prefix}.fit-sfacts44_gpumem-s{s}-seed{fit_seed}.gpumem"
#     print(eval_path)
    try:
        with open(eval_path) as f:
            max_vmem = max([int(line) for i, line in enumerate(f)])
    except FileNotFoundError:
#         print(f"Not found: {eval_path}")
        pass
    else:
        meta_dict = dict(
            path=eval_path, n=n, s=s, g=g, sim_seed=sim_seed, fit_seed=fit_seed, max_vmem=max_vmem,
        )
        _benchmarks.append(meta_dict)
_benchmarks = pd.DataFrame(_benchmarks).set_index('path')
print(_benchmarks.shape)
benchmarks3 = _benchmarks

In [None]:
d0 = benchmarks3.groupby(['n', 'g', 's']).max_vmem.median().reset_index().assign(n_to_s_ratio=lambda x: x.n / x.s)
ordered_s_list = [20, 40, 100, 200, 400]
palette = {s: v for s, v in zip(ordered_s_list, mpl.cm.cool(ordered_s_list))}
ls_map = {250: ':', 500: '--', 1000: '-'}

for (g, s), d1 in d0.groupby(['g', 's']):
    plt.plot('n', 'max_vmem', ls=ls_map[g], c=palette[s], data=d1.sort_values('n'), label='__nolegend__')
    
for s in ordered_s_list:
    plt.plot([], [], ls='-', c=palette[s], label=f's={s}')
for g in d0.g.unique():
    plt.plot([], [], ls=ls_map[g], c='black', label=f'g={g}')
plt.legend(loc='upper left')

plt.yscale('log')
plt.xscale('log')
plt.ylabel('Peak GPU Memory Allocation (Mb)')
plt.xlabel('samples')
# plt.ylim(4e2, 5e3)

In [None]:
# data/sfacts_simulate-model_simplest_simulation-n100-g500-s400-pi40-mu100-eps10-seed0.metagenotype-n100-g500.fit-sfacts44_timeit-s400-g1000000-seed2.world.nc


import re

_benchmarks = []
for n, g, s, fit_seed, sim_seed, fit_type in tqdm(list(product(
    [100, 200, 500, 1000, 2500, 10000],
    [250, 500, 1000],
    [20, 40, 100, 200, 400],
    range(3),
    range(3),
    ['sfacts44', 'sfinder']
))):
    sim_prefix = f"data/sfacts_simulate-model_simplest_simulation-n{n}-g{g}-s{s}-pi40-mu100-eps10-seed{sim_seed}"
    mgen_prefix = f"{sim_prefix}.metagenotype-n{n}-g{g}"
    eval_path = f"{mgen_prefix}.fit-{fit_type}_timeit-s{s}-seed{fit_seed}.time"
#     print(eval_path)
    try:
        with open(eval_path) as f:
            d = f.readlines()
    except FileNotFoundError:
#         print(eval_path)
        continue
    else:
        if len(d) == 0:
#             print(eval_path)
            continue
        maxrss = int(re.match('.* ([0-9]+)maxresident\)k', d[0])[1])
    meta_dict = dict(
        path=eval_path,
        n=n,
        g=g,
        s=s,
        fit_seed=fit_seed,
        sim_seed=sim_seed,
        fit_type=fit_type,
        maxrss=maxrss,
    )
    _benchmarks.append(meta_dict)
_benchmarks = pd.DataFrame(_benchmarks).set_index('path')
print(_benchmarks.shape)
benchmarks4 = _benchmarks

In [None]:
benchmarks4[lambda x: x.fit_type=='sfacts44'].groupby(['n', 'g', 's']).maxrss.median()

In [None]:
benchmarks4.groupby(['n', 'g', 's', 'fit_type']).apply(len).xs('sfacts44', level='fit_type')

In [None]:
benchmarks4['s'].value_counts()

In [None]:
import statsmodels.formula.api as smf

d0 = (
    benchmarks4
    .drop(benchmarks4[lambda x: (x.n == 10000) & (x.g==1000)].index)  # These must have gotten swapped.
    .groupby(['n', 'g', 's', 'fit_type', 'sim_seed', 'fit_seed'])
    .maxrss
    .max()
    .reset_index()
    .assign(
        n_to_s_ratio=lambda x: x.n / x.s,
        maxrss_mb=lambda x: x.maxrss / 1e3
    )
)

fit = smf.ols('maxrss ~ n:g + n:s + g:s', data=d0[lambda x: (x.fit_type=='sfacts44')]).fit()
fit.summary()

In [None]:
d0[lambda x: (x.fit_type=='sfacts44')].groupby(['n', 'g', 's']).apply(len).reset_index().s.value_counts()

In [None]:
benchmarks4.drop(benchmarks4[lambda x: (x.n == 10000) & (x.g==1000)].index)[lambda x: (x.fit_type=='sfacts44')].groupby(['n', 'g', 's']).apply(len)

In [None]:
import statsmodels.formula.api as smf

d0 = (
    benchmarks4
    .drop(benchmarks4[lambda x: (x.n == 10000) & (x.g==1000)].index)  # These must have gotten swapped.
    .groupby(['n', 'g', 's', 'fit_type', 'sim_seed', 'fit_seed'])
    .maxrss
    .max()
    .reset_index()
    .assign(
        n_to_s_ratio=lambda x: x.n / x.s,
        maxrss_mb=lambda x: x.maxrss / 1e3
    )
)

fit2 = smf.rlm('maxrss ~ n:g + n:s + g:s', data=d0[lambda x: (x.fit_type=='sfacts44')]).fit()
fit2.summary()

In [None]:
d0_extrapolate = pd.DataFrame(dict(g=1000, s=400, n=np.logspace(np.log10(100), np.log10(50_000))))
d0_extrapolate['predict_mb'] = fit2.predict(d0_extrapolate) / 1_000

d0_extrapolate[d0_extrapolate.predict_mb < 32_000].iloc[-1]

In [None]:
d0 = (
    benchmarks4
    .drop(benchmarks4[lambda x: (x.n == 10000) & (x.g==1000)].index)  # These must have gotten swapped.
    .groupby(['n', 'g', 's', 'fit_type'])
    .maxrss
    .median()
    .reset_index()
    .assign(
        n_to_s_ratio=lambda x: x.n / x.s,
        maxrss_mb=lambda x: x.maxrss / 1e3
    )
)
ordered_s_list = [20, 40, 100, 200, 400]

ls_map = {250: ':', 500: '--', 1000: '-'}

fig, ax = plt.subplots()
for fit_type, cm in zip(['sfacts44'], [mpl.cm.Blues]):
    palette = {s: v for s, v in zip(ordered_s_list, cm(ordered_s_list))}
    for (s, g), d1 in d0[d0.fit_type == fit_type].sort_values('n').groupby(['s', 'g']):
        ax.plot('n', 'maxrss_mb', ls=ls_map[g], c=palette[s], data=d1, lw=2, label='__nolegend__')
    for s in d0.s.sort_values().unique():
        ax.plot([], [], ls='-', c=palette[s], label=f's={s}')


for g in [250, 500, 1000]:
    plt.plot([], [], ls=ls_map[g], color='grey', label=f'g={g}')

plt.plot('n', 'predict_mb', data=d0_extrapolate, lw=1, color='red', ls='dashdot', label='predicted')


ax.legend(loc='upper left', ncol=1)

# ax.set_yscale('log')
# ax.set_xscale('log')
ax.set_xlim(0, 1.2e4)
ax.set_ylim(0, 1.0e4)
ax.set_ylabel('Peak Memory Allocation (Mb)')
ax.set_xlabel('samples')
# ax.set_ylim(5e1, 1e4)
# fig.tight_layout()

In [None]:
extrapolate2 = pd.DataFrame(dict(g=1000, s=100, n=np.logspace(np.log10(100), np.log10(50_000))))
extrapolate2['predict_mb'] = fit.predict(extrapolate2) / 1_000

extrapolate2[extrapolate2.predict_mb < 32_000].iloc[-1]

In [None]:
extrapolate3 = pd.DataFrame(dict(g=1000, s=100, n=np.logspace(np.log10(100), np.log10(50_000))))
extrapolate3['predict_mb'] = fit2.predict(extrapolate2) / 1_000

extrapolate3[extrapolate3.predict_mb < 32_000].iloc[-1]

In [None]:
d0 = (
    benchmarks4
    [lambda x: x.s == 100]
    [lambda x: ~((x.n == 10000) & (x.g==1000))]  # These must have gotten RAM swapped.
    .groupby(['n', 'g', 's', 'fit_type'])
    .maxrss
    .median()
    .reset_index()
    .assign(
        n_to_s_ratio=lambda x: x.n / x.s,
        maxrss_mb=lambda x: x.maxrss / 1e3
    )
)
ordered_g_list = [250, 500, 1000]

ls_map = {250: ':', 500: '--', 1000: '-'}

fig, ax = plt.subplots()
for fit_type, cm in zip(['sfacts44'], [mpl.cm.Blues]):
    palette = {s: v for s, v in zip([None] + ordered_g_list, cm(np.linspace(0, 1, num=len(ordered_g_list) + 1)))}
    for g, d1 in d0[d0.fit_type == fit_type].sort_values(['n', 'g']).groupby(['g']):
        ax.plot('n', 'maxrss_mb', marker='.', c=palette[g], markersize=12, data=d1, lw=1, ls='-', label=f'G={g}')
#     for s in d0.s.sort_values().unique():
#         ax.plot([], [], ls='-', c=palette[g], label=f'g={g}')


# for g in [250, 500, 1000]:
#     plt.plot([], [], ls=ls_map[g], color='grey', label=f'g={g}')

plt.plot('n', 'predict_mb', data=extrapolate2, color='red', lw=2, ls='-', label='predicted', alpha=0.7)


ax.legend(loc='upper left', ncol=1)

ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylabel('peak memory allocation (Mb)')
ax.set_xlabel('samples (N)')
# ax.set_ylim(5e1, 1e4)
# fig.tight_layout()

fig.savefig('fig/memory_profiling.pdf', dpi=400)

In [None]:
d0 = (
    benchmarks4
    .drop(benchmarks4[lambda x: (x.n == 10000) & (x.g==1000)].index)  # These must have gotten swapped.
    .groupby(['n', 'g', 's', 'fit_type'])
    .maxrss
    .median()
    .reset_index()
    .assign(
        n_to_s_ratio=lambda x: x.n / x.s,
        maxrss_mb=lambda x: x.maxrss / 1e3
    )
)
ordered_s_list = [20, 40, 100, 200, 400]

ls_map = {250: ':', 500: '--', 1000: '-'}

fig, ax = plt.subplots()
for fit_type, cm in zip(['sfacts44'], [mpl.cm.Blues]):
    palette = {s: v for s, v in zip(ordered_s_list, cm(ordered_s_list))}
    for (s, g), d1 in d0[d0.fit_type == fit_type].sort_values('n').groupby(['s', 'g']):
        ax.plot('n', 'maxrss_mb', ls=ls_map[g], c=palette[s], data=d1, lw=2, label='__nolegend__')
    for s in d0.s.sort_values().unique():
        ax.plot([], [], ls='-', c=palette[s], label=f'S={s}')


for g in [250, 500, 1000]:
    plt.plot([], [], ls=ls_map[g], color='grey', label=f'G={g}')

plt.plot('n', 'predict_mb', data=d0_extrapolate, lw=1, color='red', ls='dashdot', label='predicted')


ax.legend(loc='upper left', ncol=1)

ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylabel('Peak Memory Allocation (Mb)')
ax.set_xlabel('samples (N)')
# ax.set_ylim(5e1, 1e4)
# fig.tight_layout()

fig.savefig('doc/static/memory_profiling_more_strains_figure.pdf', dpi=400)