## Preamble

In [None]:
%load_ext autoreload

In [None]:
import os as _os

_os.chdir(_os.environ["PROJECT_ROOT"])
_os.path.realpath(_os.path.curdir)

### Imports

In [None]:
import os
import subprocess
import time
from itertools import chain, product
from tempfile import mkstemp
from warnings import filterwarnings

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import sfacts as sf
import statsmodels.formula.api as smf
import xarray as xr
from mpl_toolkits.axes_grid1 import make_axes_locatable

# from fastcluster import linkage
from scipy.cluster.hierarchy import linkage
from scipy.spatial.distance import pdist, squareform
from statsmodels.graphics.regressionplots import influence_plot
from statsmodels.stats.multitest import fdrcorrection
from tqdm import tqdm

import lib.plot
import lib.thisproject.data
from lib.pandas_util import align_indexes, aligned_index, idxwhere, invert_mapping

In [None]:
sns.set_context("paper")
plt.rcParams["figure.dpi"] = 50

In [None]:
def _calculate_2tailed_pvalue_from_perm(obs, perms):
    hypoth_left = perms > obs
    hypoth_right = perms < obs
    null_p_left = (hypoth_left.sum() + 1) / (len(hypoth_left) + 1)
    null_p_right = (hypoth_right.sum() + 1) / (len(hypoth_right) + 1)
    return np.minimum(null_p_left, null_p_right) * 2

In [None]:
def linkage_order(linkage, labels):
    return labels[sp.cluster.hierarchy.to_tree(linkage).pre_order(lambda x: x.id)]


def is_prime(n):
    if n <= 1:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True


def iterate_primes_up_to(n, return_index=False):
    n = int(np.ceil(n))
    idx = 0
    for i in range(n):
        if is_prime(i):
            if return_index:
                yield (idx, i)
            else:
                yield i
            idx += 1


def maximally_shuffled_order(sorted_order):
    n = len(sorted_order)
    primes_list = list(iterate_primes_up_to(np.sqrt(n)))
    table = pd.DataFrame(np.arange(n), index=sorted_order, columns=["original_order"])
    for prime in primes_list:
        table[prime] = table.original_order % prime
    table.sort_values(primes_list).original_order.values
    table = table.assign(new_order=table.sort_values(primes_list).original_order.values)
    z = table.sort_values("new_order").original_order.values
    table["delta"] = [np.nan] + list(z[1:] - z[:-1])
    return table.sort_values("new_order").index.to_list()

## Construct Metadata

In [None]:
pair_type_palette={'Transition': 'plum', 'EEN': 'pink', 'PostEEN': 'lightblue'}

diet_palette = {
    "EEN": "lightgreen",
    "PostEEN": "lightblue",
    "InVitro": "plum",
    "PreEEN": "lightpink",
}

subject_order = [
    "A",
    "B",
    "H",
    "C",
    "D",
    "E",
    "F",
    "G",
    "K",
    "L",
    "M",
    "N",
    "O",
    "P",
    "Q",
    "R",
    "S",
    "T",
    "U",
]

# NOTE: Requires a dummy value because I want exactly 20 items.
subject_palette = lib.plot.construct_ordered_palette(
    subject_order + [f"dummy{i}" for i in range(20 - len(subject_order))], cm="tab20"
)
subject_palette["X"] = "black"
pair_type_order = ["EEN", "Transition", "PostEEN"]
pair_type_marker_palette = {"EEN": "s", "Transition": ">", "PostEEN": "o"}
pair_type_linestyle_palette = {"EEN": ":", "Transition": "-.", "PostEEN": "-"}

In [None]:
sample = (
    pd.read_table("meta/een-mgen/sample.tsv")
    .assign(
        label=lambda x: x[
            ["collection_date_relative_een_end", "diet_or_media", "sample_id"]
        ].apply(tuple, axis=1)
    )
    .set_index("sample_id")
)
subject = pd.read_table("meta/een-mgen/subject.tsv", index_col="subject_id")

In [None]:
rotu_counts = pd.read_table(
    "data/group/een/a.proc.zotu_counts.tsv", index_col="#OTU ID"
).rename_axis(index="zotu", columns="sample_id")
rotu_taxonomy = rotu_counts.taxonomy
rotu_counts = rotu_counts.drop(columns=["taxonomy"]).T
rotu_rabund = rotu_counts.divide(rotu_counts.sum(1), axis=0)

sample_rotu_bc_linkage = sp.cluster.hierarchy.linkage(
    rotu_rabund, method="average", metric="braycurtis", optimal_ordering=True
)

In [None]:
missing_samples = sorted(idxwhere(~rotu_counts.index.to_series().isin(sample.index)))
print(len(missing_samples), ", ".join(missing_samples))

In [None]:
x = rotu_rabund
row_colors = pd.DataFrame(
    dict(
        subj=sample.subject_id.map(subject_palette),
        swap=sample.index.to_series()
        .isin(["CF_11", "CF_15"])
        .replace({False: "grey", True: "black"}),
    )
)
row_linkage = sample_rotu_bc_linkage

sns.clustermap(
    rotu_rabund,
    norm=mpl.colors.PowerNorm(1 / 5),
    row_colors=row_colors,
    row_linkage=row_linkage,
)

In [None]:
pd.read_table(
    "data/group/een/r.proc.gtpro.species_depth.tsv")

In [None]:
gtpro_depth = (pd.read_table(
    "data/group/een/r.proc.gtpro.species_depth.tsv",
    index_col=['sample', "species_id"],
    )
    .depth.unstack(fill_value=0)
    .rename(columns=str, index=lambda x: "CF_" + str(int(x.split("_")[1])))
    .rename({'CF_15': 'CF_11', 'CF_11': 'CF_15'})  # Sample swap
)
gtpro_rabund = gtpro_depth.divide(gtpro_depth.sum(1), axis=0)

gtpro_rabund

In [None]:
motu_depth = (pd.read_table(
    "data/group/een/r.proc.gene99_new-v22-agg75.spgc_specgene-ref-t25-p95.species_depth.tsv",
    names=['sample', "species_id", 'depth'], index_col=['sample', "species_id"],
    )
    .depth.unstack(fill_value=0)
    .rename(columns=str, index=lambda x: "CF_" + str(int(x.split("_")[1])))
    .rename({'CF_15': 'CF_11', 'CF_11': 'CF_15'})  # Sample swap
)
motu_rabund = motu_depth.divide(motu_depth.sum(1), axis=0)

motu_rabund

In [None]:
x, y = align_indexes(motu_rabund, rotu_rabund)


x_linkage = linkage(x, method="average", metric="braycurtis", optimal_ordering=True)
y_linkage = linkage(y, method="average", metric="braycurtis", optimal_ordering=True)
colors = pd.DataFrame(
    dict(
        subj=sample.subject_id.map(subject_palette),
        swap=sample.index.to_series()
        .isin(["CF_11", "CF_15"])
        .replace({False: "grey", True: "black"}),
    )
)

x_pdist = pd.DataFrame(
    squareform(pdist(x, metric="braycurtis")), index=x.index, columns=x.index
)
sns.clustermap(
    x_pdist,
    row_linkage=y_linkage,
    col_linkage=x_linkage,
    row_colors=colors,
    col_colors=colors,
)

In [None]:
x, y = align_indexes(motu_rabund, gtpro_rabund)


x_linkage = linkage(x, method="average", metric="braycurtis", optimal_ordering=True)
y_linkage = linkage(y, method="average", metric="braycurtis", optimal_ordering=True)
colors = pd.DataFrame(
    dict(
        subj=sample.subject_id.map(subject_palette),
        swap=sample.index.to_series()
        .isin(["CF_11", "CF_15"])
        .replace({False: "grey", True: "black"}),
    )
)

x_pdist = pd.DataFrame(
    squareform(pdist(x, metric="braycurtis")), index=x.index, columns=x.index
)
sns.clustermap(
    x_pdist,
    row_linkage=y_linkage,
    col_linkage=x_linkage,
    row_colors=colors,
    col_colors=colors,
)

In [None]:
bins = np.linspace(0, 30_000, num=200)

fig, axs = plt.subplots(2, sharex=True)

for (title, x), ax in zip(
    dict(
        total_depth_by_sample=motu_depth.sum(1),
        total_depth_by_species=motu_depth.sum(0),
    ).items(),
    axs.flatten(),
):
    ax.hist(x, bins=np.logspace(-1, 5, num=100))
    ax.set_title(title)
    ax.set_xscale("log")
fig.tight_layout()

In [None]:
motu_rabund.mean().sort_values(ascending=False).head(20)

In [None]:
n_species = 10
top_motus = (
    (motu_rabund > 1e-5).sum().sort_values(ascending=False).head(n_species).index
)

fig, axs = plt.subplots(
    n_species, figsize=(5, 0.3 * n_species), sharex=True, sharey=True
)

bins = np.logspace(-8, 1, num=51)

for species_id, ax in zip(top_motus, axs):
    # ax.hist(rabund_subset[species_id], bins=bins, alpha=0.7)
    ax.hist(motu_rabund[species_id], bins=bins, alpha=0.7)
    ax.set_xscale("log")
    prevalence = (motu_rabund[species_id] > 1e-5).mean()
    ax.set_title("")
    # ax.set_xticks()
    # ax.set_yticks()
    ax.yaxis.set_visible(False)
    ax.xaxis.set_visible(False)
    ax.patch.set_alpha(0.0)
    for spine in ["left", "right", "top", "bottom"]:
        ax.spines[spine].set_visible(False)
    ax.annotate(
        f"{species_id} ({prevalence:0.0%})",
        xy=(0.05, 0.1),
        ha="left",
        xycoords="axes fraction",
    )
    ax.set_xlim(left=1e-9)
    ax.set_ylim(top=20)
    ax.axvline(1e-5, lw=1, linestyle=":", color="k")

ax.xaxis.set_visible(True)
ax.spines["bottom"].set_visible(True)
ax.set_xticks([1e-4, 1e-2, 1e-0])
ax.set_xticklabels(["0.01%", "1%", "100%"])
ax.set_xlabel("Relative Abundance")

# fig.subplots_adjust(hspace=-0.75)

In [None]:
def parse_taxonomy_string(taxonomy_string):
    values = taxonomy_string.split(";")
    return pd.Series(values, index=["d__", "p__", "c__", "o__", "f__", "g__", "s__"])

In [None]:
motu_taxonomy_inpath = "ref/uhgg_genomes_all_v2.tsv"

_motu_taxonomy = (
    pd.read_table(motu_taxonomy_inpath)[lambda x: x.Genome == x.Species_rep]
    .assign(species_id=lambda x: "1" + x.MGnify_accession.str.split("-").str[2])
    .set_index("species_id")
)

# motu_lineage_string = _motu_taxonomy.Lineage

motu_taxonomy = _motu_taxonomy.Lineage.apply(
    parse_taxonomy_string
)  # .assign(taxonomy_string=motu_lineage_string)
motu_taxonomy

In [None]:
for _species_id in top_motus.astype(str):
    print(_species_id, ":", ";".join(motu_taxonomy.loc[_species_id].values))

In [None]:
def _label_experiment_sample(x):
    if x.sample_type == "human":
        label = f"[{x.name}] {x.collection_date_relative_een_end} {x.diet_or_media}"
    elif x.sample_type in ["Fermenter_inoculum"]:
        label = f"[{x.name}] {x.source_samples} inoc {x.diet_or_media}"
    elif x.sample_type in ["Fermenter"]:
        label = f"[{x.name}] {x.source_samples} frmnt {x.diet_or_media}"
    elif x.sample_type in ["mouse"]:
        if x.status_mouse_inflamed == 'Inflamed':
            label = f"[{x.name}] {x.source_samples} 🐭 {x.mouse_genotype} {x.diet_or_media} inflam"
        elif x.status_mouse_inflamed == 'not_Inflamed':
            label = f"[{x.name}] {x.source_samples} 🐭 {x.mouse_genotype} {x.diet_or_media} not_inf"
        else:
            raise ValueError(f"sample type {x.status_mouse_inflamed} not understood")
    else:
        raise ValueError(f"sample type {x.sample_type} not understood")
    return label


sample.sort_values(
    [
        "subject_id",
        "collection_date_relative_een_end",
        "source_samples",
        "sample_type",
        "diet_or_media",
    ]
).assign(label=lambda d: d.apply(_label_experiment_sample, axis=1)).label

## Focal Species Plots

In [None]:
def strains_in_subjects(
    _species_rabund,
    # _species_id,
    _world,
    _meta,
    # _species_taxonomy=motu_taxonomy,
    _species_rabundB=None,
    savefig=False,
    plt_mean_key=False,
    subject_order=subject_order,
    # _outpath_pattern="fig/een_{_species_name}_strain_tracking.pdf",
    ncols=2,
    ax_width=7,
    ax_height=4,
):
    # print(_species_taxonomy.loc[_species_id])
    # _species_name = (
    #     _species_taxonomy.loc[_species_id].s__[len("s__") :].replace(" ", "_")
    # )
    _frac = _world.drop_low_abundance_strains(0.05).community.to_pandas()

    # # Arbitrarily ordered palette:
    # _strain_list = list(_frac.columns)
    # _strain_list.remove(-1)  # Drop "other" strain.
    # strain_palette = lib.plot.construct_ordered_palette(_strain_list, cm="rainbow")

    # Genotype similarity ordered palette:
    _world_drop_low_abund = _world.drop_low_abundance_strains(0.05)
    _strain_list = list(
        linkage_order(
            _world_drop_low_abund.genotype.linkage(optimal_ordering=True),
            _world_drop_low_abund.strain.values,
        )
    )
    # # Drop "other" strain.
    # if -1 in _strain_list:
    #     _strain_list.remove(-1)
    strain_palette = lib.plot.construct_ordered_palette(
        _strain_list,
        cm="rainbow",
        extend={-1: 'grey'},
    )

    # # Construct an ordered palette, but use the order to AVOID closely correlated strains (e.g. found in the same subject)
    # # showing up with similar colors.
    # _strain_list = linkage_order(
    #     sp.cluster.hierarchy.linkage(
    #         _frac.groupby(meta.subject_id).mean().T,
    #         method="average",
    #         metric="cosine",
    #         optimal_ordering=True,
    #     ),
    #     index=_frac.columns,
    # )
    # _strain_list.remove(-1)  # Drop "other" strain.
    # strain_palette = lib.plot.construct_ordered_palette(
    #     maximally_shuffled_order(
    #         _strain_list
    #     ),
    #     cm="rainbow",
    # )

    d0 = (
        _meta.assign(
            # has_strain_deconv=lambda x: x.index.isin(_comm.index),
            species_rabund=_species_rabund,
            species_rabundB=_species_rabundB,
        )
        .join(_frac)
        .sort_values(
            [
                "subject_id",
                "collection_date_relative_een_end",
                "sample_type",
                "source_samples",
                "diet_or_media",
                "mouse_genotype",
                "status_mouse_inflamed",
            ]
        )
    )
    xlim = d0.subject_id.value_counts().max()

    _here_subject_list = _meta.subject_id.unique()
    _subject_order = [s for s in subject_order if s in _here_subject_list]
    fig, axs = lib.plot.subplots_grid(
        ncols=ncols,
        naxes=len(_subject_order),
        ax_width=ax_width,
        ax_height=ax_height,
    )
    # fig.suptitle(_species_name)
    for subject_id, ax in zip(_subject_order, axs.flatten()):
        ax.set_title(subject_id)
        twin_ax = ax.twinx()
        d1 = d0[lambda x: x.subject_id == subject_id].set_index("label")
        if d1.empty:
            continue
        d1.reindex(columns=[-1] + _strain_list, fill_value=0).plot(
            kind="bar",
            width=0.95,
            stacked=True,
            color=strain_palette,
            ax=ax,
            edgecolor="k",
            lw=0.5,
        )
        d1.species_rabund.plot(
            kind="line", ax=twin_ax, color="k", marker=".", linestyle="-"
        )
        if _species_rabundB is not None:
            d1.species_rabundB.plot(
                kind="line", ax=twin_ax, color="midnightblue", marker=".", linestyle=":"
            )
        ax.legend_.set_visible(False)
        ax.set_ylim(0, 1)
        ax.set_ylabel("strain fraction")
        ax.set_xlabel("")
        twin_ax.set_ylabel("species relative abundance")
        twin_ax.set_ylim(0)
        lib.plot.rotate_xticklabels(ax)
        ax.set_xlim(-0.5, xlim + 0.5)
    fig.tight_layout()

    if savefig:
        fig.savefig(savefig)

    # Plot colorbars for each subject showing strain abundances.
    if plt_mean_key:
        fig, ax = plt.subplots()
        d2 = d0.groupby("subject_id")[_frac.columns].mean().reindex(_subject_order)
        d2.reindex(columns=[-1] + _strain_list, fill_value=0).plot(
            kind="bar",
            width=0.95,
            stacked=True,
            color=strain_palette,
            ax=ax,
            edgecolor="k",
            lw=0.5,
        )
        ax.legend_.set_visible(False)

In [None]:
def compete_two_fits(_world0, _world1, plot_npos=1000, low_abund_thresh=0.05):
    w0 = _world0.random_sample(
        position=min(_world0.sizes["position"], plot_npos)
    ).drop_low_abundance_strains(low_abund_thresh)
    w1 = _world1.sel(position=w0.position, sample=w0.sample).drop_low_abundance_strains(
        0.05
    )

    sf.plot.plot_metagenotype(
        w0, col_linkage_func=lambda w: w0.metagenotype.linkage(optimal_ordering=True)
    )
    sf.plot.plot_community(
        w0,
        col_linkage_func=lambda w: w0.metagenotype.linkage(optimal_ordering=True),
        row_linkage_func=lambda w: w.genotype.linkage(optimal_ordering=True),
    )
    sf.plot.plot_community(
        w1,
        col_linkage_func=lambda w: w0.metagenotype.linkage(optimal_ordering=True),
        row_linkage_func=lambda w: w.genotype.linkage(optimal_ordering=True),
    )

    fig, axs = plt.subplots(2, 2)
    ax = axs[0, 0]
    ax.scatter(w0.community.entropy(), w1.community.entropy())
    ax.plot([0, 2.5], [0, 2.5])

    ax = axs[0, 1]
    ax.scatter(
        squareform(_world0.unifrac_pdist()), squareform(_world0.metagenotype.pdist())
    )
    ax.scatter(
        squareform(_world1.unifrac_pdist()), squareform(_world1.metagenotype.pdist())
    )

    _mgtp_pdist = _world0.metagenotype.pdist()
    _unifrac_pdist0 = _world0.unifrac_pdist()
    _unifrac_pdist1 = _world1.unifrac_pdist()

    _world0_sample_corr = {}
    _world1_sample_corr = {}
    for sample in _world0.sample.values:
        _world0_sample_corr[sample] = sp.stats.spearmanr(
            _unifrac_pdist0.loc[sample], _mgtp_pdist.loc[sample]
        )[0]
        _world1_sample_corr[sample] = sp.stats.spearmanr(
            _unifrac_pdist1.loc[sample], _mgtp_pdist.loc[sample]
        )[0]

    sample_accuracy = pd.DataFrame(
        dict(world0=_world0_sample_corr, world1=_world1_sample_corr)
    )

    ax = axs[1, 0]
    ax.scatter("world0", "world1", data=sample_accuracy)
    ax.plot([0, 1], [0, 1])

### Zotu4

#### s__Escherichia coli_D (Zotu4 -> 102506) (1 of 3)

In [None]:
motu_taxonomy[motu_taxonomy.g__.str.contains("Escherichia")]

In [None]:
rotu_taxonomy[rotu_taxonomy.str.contains("Escherichia")].reset_index().values

In [None]:
_species_id = "102506"
_rotu_list = ["Zotu4"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 8)), (['mouse'], (30, 9))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

In [None]:
for i, (sample_type_list, (ax_width, ax_height)) in enumerate([(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 8)), (['mouse'], (30, 9))]):
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=1,
        ax_width=ax_width,
        # savefig=f'fig/{_species_id}.strain_tracking.{i}.pdf',
    )

In [None]:
for i, (sample_type_list, (ax_width, ax_height)) in enumerate([(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 8)), (['mouse'], (30, 9))]):
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=1,
        ax_width=ax_width,
        savefig=f'fig/{_species_id}.strain_tracking.{i}.pdf',
    )

#### s__Klebsiella pneumoniae (Zotu4 -> 102538) (2 of 3)

In [None]:
_species_id = "102538"
_rotu_list = ["Zotu4"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

#### s__Escherichia marmotae (Zotu4 -> 102322) (3 of 3)

In [None]:
_species_id = "102322"
_rotu_list = ["Zotu4"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Enterococcus_D sp002850555 (Zotu85 -> 100323)

In [None]:
_species_id = "100323"
_rotu_list = ["Zotu85"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Eggerthella lenta (Zotu172 -> 102544)

In [None]:
_species_id = "102544"
_rotu_list = ["Zotu172"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Dorea scindens (Zotu114 -> 101303)

In [None]:
_species_id = "101303"
_rotu_list = ["Zotu114"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Clostridium_Q symbiosum (Zotu25 -> 101367)

In [None]:
_species_id = "101367"
_rotu_list = ["Zotu25"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### Zotu5

#### s__Clostridium_M clostridioforme (Zotu5 -> 101386) (1 of 2)

In [None]:
_species_id = "101386"
_rotu_list = ["Zotu5"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

#### s__Clostridium_M bolteae (Zotu5 -> 101493) (2 of 2)

In [None]:
_species_id = "101493"
_rotu_list = ["Zotu5"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Hungatella effluvii (Zotu10 -> 100032)

In [None]:
_species_id = "100032"
_rotu_list = ["Zotu10"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Hungatella hathewayi (Zotu160 -> 100150)

In [None]:
_species_id = "100150"
_rotu_list = ["Zotu160"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Clostridium_M sp-? (Zotu31 -> 100179)

In [None]:
_species_id = "100179"
_rotu_list = ["Zotu31"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Clostridium_M sp000431375 (Zotu155 -> 100242)

In [None]:
_species_id = "100242"
_rotu_list = ["Zotu155"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Erysipelatoclostridium ramosum (Zotu46 -> 101400)

In [None]:
_species_id = "101400"
_rotu_list = ["Zotu46"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Flavonifractor plautii (Zotu49 -> 100099)

In [None]:
_species_id = "100099"
_rotu_list = ["Zotu46"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Eisenbergiella tayi (Zotu75 -> 102330)

In [None]:
_species_id = "102330"
_rotu_list = ["Zotu75"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Ruthenibacterium lactatiformans (Zotu78	103682)

In [None]:
_species_id = "103682"
_rotu_list = ["Zotu78"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Blautia_A wexlerae (Zotu13 -> 101338)

In [None]:
_species_id = "101338"
_rotu_list = ["Zotu13"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Bacteroides caccae (Zotu74 -> 102549)

In [None]:
_species_id = "102549"
_rotu_list = ["Zotu74"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Bacteroides dorei (Zotu1 -> 102478)

In [None]:
_species_id = "102478"
_rotu_list = ["Zotu1"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Bacteroides uniformis (Zotu6 -> 101346)

In [None]:
_species_id = "101346"
_rotu_list = ["Zotu6"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Bacteroides ovatus (Zotu1 -> 101378 -> Zotu14+Zotu23)

In [None]:
_species_id = "101378"
_rotu_list = ["Zotu14", "Zotu23"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )

### s__Bilophila wadsworthia (101359 -> Zotu98+Zotu136)

In [None]:
_species_id = "101359"
_rotu_list = ["Zotu98", "Zotu136"]

_world = (
    sf.data.World.load(
        f"data/group/een/species/sp-{_species_id}/r.proc.gtpro.sfacts-fit.world.nc",
        validate=False,
    )
    .drop_low_abundance_strains(0.01)
    .rename_coords(sample=lambda s: "CF_{}".format(int(s.split("_")[1])))
    .rename_coords(sample={'CF_11': 'CF_15', 'CF_15': 'CF_11'})  # Sample swap
)

print(motu_taxonomy.loc[_species_id])

strains_in_subjects(
    _species_rabund=motu_rabund[_species_id],
    _species_rabundB=rotu_rabund[_rotu_list].sum(1),
    _world=_world,
    _meta=sample[lambda x: x.diet_or_media.isin(["EEN", "PostEEN"])],
    plt_mean_key=True,
    ncols=3,
)

for sample_type_list, (ax_width, ax_height) in [(['human'], (8, 4)), (['Fermenter', 'Fermenter_inoculum'], (15, 5)), (['mouse'], (30, 7))]:
    strains_in_subjects(
        _species_rabund=motu_rabund[_species_id],
        _species_rabundB=rotu_rabund[_rotu_list].sum(1),
        _world=_world,
        _meta=sample[lambda x: x.subject_id.isin(["A", "B", "H"]) & x.sample_type.isin(sample_type_list)].assign(
            label=lambda x: x.apply(_label_experiment_sample, axis=1)
        ),
        ncols=3,
        ax_width=ax_width,
    )