In [None]:
%matplotlib inline
%run ridges.py

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from glob import glob
import os

In [None]:
sns.set_palette('deep')

### Load distances

In [None]:
dist_df = pd.read_csv("../data/network_distances.tsv", sep="\t", index_col=0)

In [None]:
co_dist = pd.read_csv("../data/cophenetic_matrix_all.tsv", sep="\t", index_col=0)

## load communities

In [None]:
types = ["random", "bin_rnd_01", "bin_rnd_001"]

In [None]:
orgs = {}
for commtype in types:
    orgs[commtype] = pd.concat(pd.read_csv(filename, sep='\t', header=None,
                                  names=["community", "organism"])
                               for filename in glob(f'../communities/{commtype}/*.tsv'))
    orgs[commtype]["type"] = commtype

In [None]:
orgs = pd.concat(orgs.values())

orgs["size"] = orgs["community"].apply(lambda x: int(x.split("_")[1]))

In [None]:
sizes = [2,4,6,8,10,15,20,25,30,40]
orgs = orgs[orgs["size"].isin(sizes)]

In [None]:
def jacc_distance(x):
    m = dist_df.loc[x,x].values
    return m[np.triu_indices(len(m),1)].mean()

def phylo_distance(x):
    m = co_dist.loc[x,x].values
    return m[np.triu_indices(len(m),1)].mean()

In [None]:
%%time
orgs["jacc"] = orgs["organism"]
orgs["phylo"] = orgs["organism"]
sim = orgs.groupby(["community", "size", "type"], as_index=False).agg({
    'jacc': jacc_distance,
    'phylo': phylo_distance
})

In [None]:
ridges(sim, "jacc", (0.35,0.79), "network dissimilarity")
plt.savefig("../figures/fig_2c.png", dpi=300)

In [None]:
ridges(sim, "phylo", (2.3,4.3), "phylogenetic distance")
plt.savefig("../figures/fig_2e.png", dpi=300)