# Distance metric comparison
> Do the pairwise distances between mixture embeddings correlate with things like beta-diversity?

In [34]:
%load_ext autoreload
%autoreload 2

import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Clustering must allow custom distance metric
from scipy.spatial.distance import pdist, squareform
from sklearn.metrics import pairwise_distances

from skbio import TreeNode
from skbio.diversity import beta_diversity
from skbio.diversity.beta import unweighted_unifrac

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
# Load tree, OTU table, get UniFrac distances

# Load tree; iterate through all branches and print length; fix if None
tree = TreeNode.read(
    "/home/phil/phylosig/greengenes/data/gg_13_5_otus/trees/97_otus.tree"
)
for i in tree.postorder(include_self=False):
    if i.length is None:
        print(i.name, i.length)
        i.length = 0.0

# Load OTU table with string index and float values

otu_table = pd.read_table(
    "/home/phil/mixture_embeddings/data/interim/mlrepo_clean/classification/bacteremia/otus.txt",
    dtype={0: str}
)
otu_table = otu_table.set_index(otu_table.columns[0]).astype(float).T

k__Bacteria None


In [33]:
def unifrac(u, v):
    return unweighted_unifrac(u, v, otu_table.columns, tree)

all_unifracs = squareform(pdist(otu_table, unifrac))
print(all_unifracs)

[[0.         0.5468897  0.57393401 0.4946109  0.51944234 0.60677181
  0.55273611 0.55893976 0.52665038 0.65316115 0.61607895 0.74032413
  0.48767644 0.62295505 0.63185138 0.5781006  0.62126018 0.60492677
  0.55540886 0.76826228 0.55259941 0.55017612 0.58764794 0.68161677
  0.61003076 0.5628998  0.56478321 0.56338648]
 [0.5468897  0.         0.56139553 0.54911812 0.60265365 0.55940653
  0.54448661 0.54786728 0.52790659 0.68016372 0.5740244  0.79245432
  0.59299641 0.63001427 0.62589823 0.61624826 0.53427045 0.61489742
  0.57726051 0.81778204 0.53746165 0.59534411 0.60665317 0.74752454
  0.56773959 0.57296847 0.62985304 0.59933078]
 [0.57393401 0.56139553 0.         0.57674767 0.5992666  0.48977335
  0.52049981 0.5769273  0.52494525 0.6824386  0.50961724 0.78062672
  0.54004895 0.60343422 0.54276858 0.59998803 0.58053958 0.60860239
  0.53362896 0.78931631 0.53442444 0.53613131 0.66090275 0.7732384
  0.49465182 0.59349194 0.63372079 0.62626484]
 [0.4946109  0.54911812 0.57674767 0.       