In [12]:
import zipfile
import pandas as pd
import numpy as np
import io
import random
import os
import scipy as sp
from scipy.stats import spearmanr, pearsonr
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import squareform, pdist
from itertools import combinations
from plotly.offline import init_notebook_mode, iplot
import plotly.figure_factory as ff
init_notebook_mode(connected=True)

This code produces a dendrogram of correlations between randomly sampled beta diversity values. Must be run in a directory containing a .qza DistanceMatrix artifacts, e.g., by running the createEBDMatrices.sh script.

In [16]:
def plot_dm_dendrogram(filters=[], nreps=2500, height=600, width=800):
    arts = []
    for filename in os.listdir():
        if (".qza" not in filename) | ("pcoa" in filename) | sum([f in filename for f in filters]):
            continue
        arts.append(filename)
    narts = len(arts)
    pairs = []
    while len(pairs) < nreps:
        x = random.randint(0, narts - 1)
        y = random.randint(0, narts - 1)
        if x != y:
            pairs.append((x,y))

    dists = np.empty((narts, nreps))
    for i, filename in enumerate(arts):
        dm = zipfile.ZipFile(filename)
        mat_path = [x for x in dm.namelist() if "distance-matrix" in x][0]
        mat = dm.read(mat_path)
        mat = pd.read_csv(io.StringIO(mat.decode()), sep="\t", index_col=0)
        for j,(x,y) in enumerate(pairs):
            dists[i,j] = mat.iloc[x,y]

    def pearson_dist(u, v):
        dist = pearsonr(u,v)[0]
        if not np.isfinite(dist):
            dist = 0
        return 1 - dist

    labels = [x.split(".")[0] for x in arts]
    dendro = ff.create_dendrogram(dists, distfun = lambda x: pdist(x, pearson_dist), 
                                  linkagefun = lambda x: linkage(x, 'average'),
                                  labels=labels, orientation='right')

    dendro['layout'].update({'width':width, 'height':height})
    dendro['layout']['margin']['r'] = 250
    dendro['layout']['yaxis']['tickvals'] = np.arange(5, dists.shape[0] * 10 + 5, 10)
    dendro['layout']['yaxis']['side'] = 'right'
    iplot(dendro)

In [17]:
#Plot all distance measures
plot_dm_dendrogram(height=800)


invalid value encountered in double_scalars



In [23]:
plot_dm_dendrogram(filters=["unweighted"],height=600)


invalid value encountered in double_scalars



In [24]:
plot_dm_dendrogram(filters=["_weighted"],height=600)

In [25]:
plot_dm_dendrogram(filters=["nonphylogenetic"],height=600)

In [26]:
plot_dm_dendrogram(filters=["_phylogenetic"],height=600)


invalid value encountered in double_scalars



In [27]:
plot_dm_dendrogram(filters=["unweighted", "nonphylogenetic"],height=400)

In [28]:
plot_dm_dendrogram(filters=["_weighted", "nonphylogenetic"],height=400)

In [29]:
plot_dm_dendrogram(filters=["unweighted", "_phylogenetic"],height=400)


invalid value encountered in double_scalars



In [30]:
plot_dm_dendrogram(filters=["_weighted", "_phylogenetic"],height=400)