# Metric function(s)

In [53]:
import numpy as np
import pandas as pd
import itertools
import random
#from skbio import diversity
import toytree
import toyplot

#from source, simpd folder, simpd.py file, import Simpd class
from src.simpd.simpd import Simpd

In [5]:
#establishing a mock community
#species pool = 50, each community has 30 sp, no phylogenetic structure, 20 communities
mock = Simpd(ntips = 50)

#writing a tree and matrix to be used in R for comparison
#newick_str = mock.sp_tree.write()
#with open("testing2_tree.nwk", "w") as f:
#    f.write(newick_str)
#mock_mat = mock.simmat(sr = 30, pa = 0, nsites = 20, df = True, csv = "testing2")

mock_mat = pd.read_csv("testing2.csv")

In [6]:
#drawing the metacommunity tree
mock.sp_tree.draw();

In [7]:
#making distance matrix for the whole metacommunity
#mock.sp_tree.distance.get_tip_distance_matrix(df = True).to_csv("testing2_dm.csv") #for R testing
meta_dm = mock.sp_tree.distance.get_tip_distance_matrix(df = True)

## Trying (failing?) to prune tree to each community

In [8]:
#taking only present species
spp = mock_mat.apply(lambda row: row.index[row == 1].tolist(), axis=1)
spp
#query_list = spp[2]
#mock.sp_tree.get_nodes(*query_list)

#pruning the tree to present species for each site
#new_tree = toytree.mod.prune(mock.sp_tree, *query_list)
#new_tree.draw()


0     [r1, r2, r3, r4, r5, r8, r9, r12, r14, r15, r1...
1     [r1, r2, r9, r10, r12, r14, r16, r17, r18, r22...
2     [r0, r1, r3, r4, r6, r8, r9, r10, r11, r12, r1...
3     [r0, r1, r3, r4, r5, r6, r7, r9, r10, r11, r14...
4     [r0, r1, r2, r4, r5, r6, r9, r10, r12, r15, r1...
5     [r4, r5, r7, r8, r10, r12, r14, r15, r16, r17,...
6     [r2, r3, r4, r5, r7, r8, r10, r12, r14, r15, r...
7     [r1, r2, r3, r5, r7, r10, r11, r12, r13, r14, ...
8     [r2, r4, r5, r7, r9, r10, r11, r13, r14, r15, ...
9     [r0, r1, r3, r6, r7, r8, r11, r13, r15, r16, r...
10    [r1, r5, r6, r8, r9, r10, r12, r13, r14, r15, ...
11    [r1, r2, r3, r6, r7, r9, r10, r11, r12, r13, r...
12    [r0, r2, r3, r4, r5, r6, r11, r12, r14, r16, r...
13    [r2, r3, r6, r7, r8, r9, r11, r14, r15, r17, r...
14    [r0, r1, r2, r3, r5, r6, r7, r10, r11, r12, r1...
15    [r1, r2, r3, r5, r10, r11, r12, r13, r16, r17,...
16    [r1, r3, r4, r6, r7, r8, r11, r12, r14, r15, r...
17    [r0, r2, r3, r4, r7, r8, r10, r11, r12, r1

In [9]:
#creating a tree for each mock community
comm_trees = []
for i in range(len(spp)):
    query_list = spp[i]
    new_tree = toytree.mod.prune(mock.sp_tree, *query_list)
    comm_trees.append(new_tree)

comm_trees[0].draw();

In [143]:
# I think this is unnecessary, but I'm keeping it in case anyway

#getting a distance matrix for each tree
#comm_dists = []
#for t in comm_trees:
#    dm = t.distance.get_tip_distance_matrix()
#    comm_dists.append(dm)
    
#comm_dists[0]

## Visualizing the trees from each community

### Highlighting included tips on metacommunity tree

In [10]:
#create a mask for species in the community only
mask = []
for i in range(len(spp)):
    query_list = spp[i]
    comm_mask = mock.sp_tree.get_node_mask(*query_list)
    mask.append(comm_mask)
mask[0]
mock.sp_tree.draw(node_mask=mask[0], node_sizes=12);

Using a heatmap to visualize all communities from the same plotted tree

### Visualizing the pruned trees

In [11]:
# trees pruned in previous section. Can choose any community to visualize one at a time
comm_trees[1].draw();

### Visualizing all communities at once

In [46]:
# scale tree
ctree = mock.sp_tree.mod.edges_scale_to_root_height()

# get canvas and axes with tree plot
canvas, axes, mark = ctree.draw(
    width=900,
    height=900,
    tip_labels_align=True,
    tip_labels=False,
    tip_labels_style={"-toyplot-anchor-shift": "80px"},
    
);

# make species matrix a numpy array
mock_mat_np = mock_mat.to_numpy()

# add n rows of species matrix data
nrow = mock_mat_np.shape[0] #number of columns in the matrix
xoffset = 5
for row in range(nrow):
    
    # select the column of data
    data = mock_mat_np[row]
    
    # plot the data column
    axes.scatterplot(
        np.repeat(row, mock.sp_tree.ntips) + xoffset, 
        np.arange(mock.sp_tree.ntips),
        marker='s',
        size=10,
        color="deeppink",           
        opacity=0.1 + data[::-1] / data.max(),
        title=data,
    );

# stretch domain to fit long tip names
axes.x.domain.max = 40

In [54]:
# create a canvas
canvas = toyplot.Canvas(width=500, height=350);

# add tree 
axes = canvas.cartesian(bounds=(50, 150, 70, 250))
mock.sp_tree.draw(axes=axes, tip_labels=False, tip_labels_align=True)

# add matrix
table = canvas.table(
    rows=13,
    columns=5, 
    margin=0,
    bounds=(175, 250, 65, 255),
)

colormap = toyplot.color.brewer.map("BlueRed")

# apply a color to each cell in the table
for ridx in range(mock_mat_np.shape[0]):
    for cidx in range(mock_mat_np.shape[1]):
        cell = table.cells.cell[ridx, cidx]
        cell.style = {
            "fill": colormap.colors(mock_mat_np[ridx, cidx], 0, 100), 
        }

# style the gaps between cells
table.body.gaps.columns[:] = 3
table.body.gaps.rows[:] = 3 

# hide axes coordinates
axes.show = False

IndexError: index 5 is out of bounds for axis 1 with size 5

In [13]:
mock_mat_np[0]

array([0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 0, 1, 1])

In [14]:
mock_mat_np.shape[0]

20

## Calculating different metrics

### Faith's phylogenetic distance (PD)
The sum of all branch lengths in the community

In [15]:
#remove absent species from distance matrix
comm_dists = []
for comm in spp:
    comm_dm = meta_dm.loc[comm, comm]
    comm_dists.append(comm_dm)
comm_dists[0]

Unnamed: 0,r1,r2,r3,r4,r5,r8,r9,r12,r14,r15,...,r36,r37,r39,r40,r41,r42,r44,r45,r48,r49
r1,0.0,0.333333,1.0,1.0,1.0,1.333333,1.333333,1.666667,1.666667,1.666667,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r2,0.333333,0.0,1.0,1.0,1.0,1.333333,1.333333,1.666667,1.666667,1.666667,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r3,1.0,1.0,0.0,0.666667,0.666667,1.333333,1.333333,1.666667,1.666667,1.666667,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r4,1.0,1.0,0.666667,0.0,0.333333,1.333333,1.333333,1.666667,1.666667,1.666667,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r5,1.0,1.0,0.666667,0.333333,0.0,1.333333,1.333333,1.666667,1.666667,1.666667,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r8,1.333333,1.333333,1.333333,1.333333,1.333333,0.0,1.0,1.666667,1.666667,1.666667,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r9,1.333333,1.333333,1.333333,1.333333,1.333333,1.0,0.0,1.666667,1.666667,1.666667,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r12,1.666667,1.666667,1.666667,1.666667,1.666667,1.666667,1.666667,0.0,0.666667,1.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r14,1.666667,1.666667,1.666667,1.666667,1.666667,1.666667,1.666667,0.666667,0.0,1.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
r15,1.666667,1.666667,1.666667,1.666667,1.666667,1.666667,1.666667,1.0,1.0,0.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


In [16]:
# This isn't right--I summed all distances, but that double counts a lot of edges. Need to only count each edge once.

#sum all distances in each matrix
#pd = []
#for dm in comm_dists:
#    total = np.sum(np.triu(dm, k=1)) #takes the upper triangle only and excludes 0s
#    pd.append(total)

#pd
#check against picante in R

mock.sp_tree.get_node_data("dist").sum()

18.666666666666668

In [17]:
# For the pruned trees, sum distances
tree_pd = []
for tree in comm_trees:
    pd = tree.get_node_data("dist").sum()
    tree_pd.append(pd)
tree_pd

[14.0,
 13.333333333333334,
 13.833333333333332,
 13.833333333333332,
 14.0,
 13.333333333333332,
 13.666666666666666,
 13.999999999999998,
 13.833333333333332,
 13.833333333333332,
 14.0,
 13.833333333333332,
 14.166666666666668,
 14.166666666666666,
 13.666666666666664,
 13.5,
 13.833333333333332,
 13.833333333333332,
 13.833333333333332,
 13.666666666666666]

This works! Gets the same result as R picante!

### Mean pairwise distance (MPD)
The mean distance between any pair of species in the community

In [18]:
pairs = list(itertools.combinations(spp[0], 2)) #doing this for one community
pair_dists = []
for p in range(len(pairs)):
    query_list = pairs[p]
    dist = mock.sp_tree.distance.get_node_distance(*query_list)
    pair_dists.append(dist)
sum(pair_dists)/len(pair_dists)

1.7065134099616857

It worked! Now to do it for all of the communities...

In [19]:
tree_mpd = []
for i in range(len(spp)):
    pairs = list(itertools.combinations(spp[i], 2))
    pair_dists = []
    for p in range(len(pairs)):
        query_list = pairs[p]
        dist = mock.sp_tree.distance.get_node_distance(*query_list)
        pair_dists.append(dist)
    tree_mpd.append(sum(pair_dists)/len(pair_dists))
tree_mpd

[1.7065134099616857,
 1.6919540229885057,
 1.703448275862069,
 1.6950191570881223,
 1.710344827586207,
 1.6980842911877394,
 1.7057471264367816,
 1.7149425287356321,
 1.710344827586207,
 1.7118773946360153,
 1.7080459770114942,
 1.70727969348659,
 1.710344827586207,
 1.7126436781609196,
 1.7019157088122603,
 1.6996168582375477,
 1.710344827586207,
 1.7065134099616857,
 1.7019157088122603,
 1.700383141762452]

### Mean nearest taxon distance (MNTD)
The mean distance from one species to its nearest neighbor for all species in the community. Same as mean nearest neighbor distance (mnnd)

In [34]:
dm = comm_dists[0]
for row in range(dm.shape[0]):
    sp_dist = list(dm.iloc[row])
    del sp_dist[row]
    min_dist = min(sp_dist)
    print(min_dist)

0.3333333333333333
0.3333333333333333
0.6666666666666666
0.3333333333333333
0.3333333333333333
1.0
1.0
0.6666666666666666
0.6666666666666666
0.6666666666666666
0.6666666666666666
1.0
1.0
1.0
0.6666666666666666
0.3333333333333333
0.3333333333333333
0.6666666666666666
0.6666666666666666
0.6666666666666666
0.6666666666666666
0.6666666666666666
0.6666666666666666
0.6666666666666666
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333


In [35]:
mntd = []
for dm in comm_dists: #for community in dms of communities list
    nt = [] #empty list of nearest taxa
    for row in range(dm.shape[0]): #for each species (row)
        sp_dist = list(dm.iloc[row]) #select species row
        del sp_dist[row] #exclude same-species distance
        nt.append(min(sp_dist)) #append minimum to nearest list
    mntd.append(sum(nt)/len(nt)) #append average to mntd list
mntd

[0.5888888888888888,
 0.5222222222222223,
 0.6,
 0.5777777777777777,
 0.5888888888888888,
 0.5111111111111111,
 0.5777777777777777,
 0.5777777777777777,
 0.5666666666666667,
 0.5666666666666667,
 0.6,
 0.5666666666666667,
 0.611111111111111,
 0.6,
 0.5555555555555555,
 0.5333333333333333,
 0.5666666666666667,
 0.5888888888888888,
 0.5888888888888888,
 0.5777777777777777]

Same results as picante in R!