In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
from matplotlib.gridspec import GridSpec

In [None]:
sns.set_palette('deep')

### load basics

In [None]:
bq = pd.read_csv("../communities/top/bq_50.tsv", sep="\t", header=None)

In [None]:
AA = pd.read_csv("../data/amino_acids.tsv", sep="\t")

### process data

In [None]:
def process(df):
    df["species"] = df["organism"].apply(lambda x: ' '.join(x.split('_')[:2]))
    df["is_bq"] = df["organism"].isin(bq[1])
    fraction = df.groupby(["genusName", "name"], as_index=False).agg(
        {"auxotrophy_ActualState": np.mean, "organism": len})
    fraction.rename(columns={"auxotrophy_ActualState": "fraction",
                             "organism": "n_species"}, inplace=True)
    df = pd.merge(df, fraction, on=["genusName", "name"])
    selected = df.query("auxotrophy_ActualState == 1 and is_bq == True and n_species >= 3")
    selected = selected[["species", "name", "fraction", "n_species", "ace1_probablity"]].sort_values("species")
    return selected.rename(columns={"fraction": "T", "ace1_probablity": "P"})

In [None]:
data_all = []
for name in AA["name"]:
    try:
        filename = f"../ancestrality/{name}_probabilities_mapped2treePerSpecies__final_5000sp_v3.tab"
        df = pd.read_csv(filename, sep='\t')
    except:
        continue
    data = process(df)
    if len(data) > 1:
        data_all.append(data)

final = pd.concat(data_all)

### plotting

In [None]:
plt.figure(figsize=(16,16))

for i, (aa, df) in enumerate(final.groupby("name")):
    df = df[["species","T", "P"]].set_index("species")
    annot = df.applymap(lambda x: f"{x:.1g}" if x < 0.4 else '')
    plt.subplot(4,4,i+1)
    ax = sns.heatmap(df[["T", "P"]], cmap="PiYG_r", annot=annot, fmt='s',
                    vmin=0, vmax=1)
    ax.set_title(aa)
    ax.set_ylabel('')
plt.tight_layout()
plt.savefig("../figures/supp_fig_7.png", dpi=300, bbox_inches='tight')