In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from random import sample
from glob import glob
from scipy.spatial.distance import pdist

In [None]:
sns.set_palette('deep')

In [None]:
df = pd.read_csv("../data/emp_150bp_filtered.tsv", sep="\t")

In [None]:
metadata = pd.read_csv('../data/emp_qiime_mapping_qc_filtered.tsv', sep='\t', usecols=[0,4,5,6,43,44])
metadata.rename(columns={'#SampleID': 'sample'}, inplace=True)
metadata["time"] = pd.to_datetime(metadata["collection_timestamp"])

In [None]:
study = metadata.query("study_id == 2192")

def func(x):
    tokens = x["sample"].split('.')
    
    if x["host_species"] == 's__Homo_sapiens':
        x["host"] = x["host_subject_id"]
        x["host_type"] = "human"
        x["loc"] = tokens[2]
    elif x["host_species"] in {'s__Felis_catus', 's__Canis_lupus'}:
        x["host_type"] = "pet"
        x["host"] = x["host_subject_id"]
        x["loc"] = tokens[3]
    else:
        x["host_type"] = "house"
        x["host"] = tokens[1]
        x["loc"] = "_".join(tokens[2:-4])
    
    return x

study = study.apply(func, axis=1)

study = study.groupby(["host", "loc"], as_index=False).filter(lambda x: len(x) >= 10)

In [None]:
def func(x):
    x["time"] = (x["time"] - x["time"].min()).apply(lambda x: x.days)
    return x

study = study.groupby(["host", "loc"]).apply(func)

In [None]:
df2 = pd.merge(study, df.query("value > 1e-5"), on="sample")

### Stability of BQ and RQ communities

In [None]:
bq = pd.read_csv("../communities/top/bq_50.tsv", sep="\t", header=None)

In [None]:
rq = pd.read_csv("../communities/top/rq_50.tsv", sep="\t", header=None)

In [None]:
data = []

def calc_stability(x):
    wide = x.pivot_table(index="org_id", columns="sample", values="value", fill_value=0)
    individual = (wide.std(axis=1) / wide.mean(axis=1)).mean()
    group = pdist(wide, "cosine").mean()
    return individual, group

for (host, loc, host_type), dfk in df2.groupby(["host", "loc", "host_type"]):
        
    df_bq = dfk[dfk["org_id"].isin(bq[1])]
    individual_bq, group_bq = calc_stability(df_bq)

    df_rq = dfk[dfk["org_id"].isin(rq[1])]
    individual_rq, group_rq = calc_stability(df_rq)

    species = sorted(set(dfk["org_id"]))
    individual_all = []
    group_all = []
    for i in range(10):
        community = sample(species, 50)
        dfi = dfk[dfk["org_id"].isin(community)]
        individual_i, group_i = calc_stability(dfi)
        individual_all.append(individual_i)
        group_all.append(group_i)
    individual = np.mean(individual_all)
    group = np.mean(group_all)

    data.append((host, loc, host_type, individual, group, individual_bq, group_bq, individual_rq, group_rq))
        
stability = pd.DataFrame(data, columns=[
    "host", "loc", "host_type", "individual", "group", "individual_bq", "group_bq", "individual_rq", "group_rq"])

In [None]:
f, axs = plt.subplots(1,2, figsize=(8,3.5))

sns.kdeplot(stability["individual"], shade=True, ax=axs[0], legend=False, color='#cccccc')
sns.kdeplot(stability["individual_bq"], shade=True, ax=axs[0], legend=False, color='#ed7e17')
sns.kdeplot(stability["individual_rq"], shade=True, ax=axs[0], legend=False, color='#1ba055')
axs[0].set_xlim(0,5)
axs[0].set_yticks([])
axs[0].set_xlabel("Coefficient of variation")
axs[0].set_title("(a)")

axs[0].legend(["Random", "Cooperative", "Competitive"])
            
sns.kdeplot(stability["group"], shade=True, ax=axs[1], legend=False, color='#cccccc')
sns.kdeplot(stability["group_bq"], shade=True, ax=axs[1], legend=False, color='#ed7e17')
sns.kdeplot(stability["group_rq"], shade=True, ax=axs[1], legend=False, color='#1ba055')
axs[1].set_xlim(0,1)
axs[1].set_yticks([])
axs[1].set_xlabel("Cosine distance")
axs[1].set_title("(b)")
axs[1].legend(["Random", "Cooperative", "Competitive"])

plt.tight_layout()
plt.savefig("../figures/supp_fig_6.png", dpi=300)