# Metric function(s)

In [1]:
import numpy as np
import pandas as pd
import random
from skbio import diversity
import toytree

from sys import path
path.append('../simpd')
from simpd import Simpd

In [2]:
#establishing a mock community
#species pool = 50, each community has 30 sp, no phylogenetic structure, 20 communities
mock = Simpd(ntips = 50)

#writing a tree and matrix to be used in R for comparison
#newick_str = mock.sp_tree.write()
#with open("testing2_tree.nwk", "w") as f:
#    f.write(newick_str)
#mock_mat = mock.simmat(sr = 30, pa = 0, nsites = 20, df = True, csv = "testing2")

mock_mat = pd.read_csv("testing2.csv")

In [3]:
#drawing the metacommunity tree
mock.sp_tree.draw();

In [4]:
#making distance matrix for the whole metacommunity
#mock.sp_tree.distance.get_tip_distance_matrix(df = True).to_csv("testing2_dm.csv") #for R testing
meta_dm = mock.sp_tree.distance.get_tip_distance_matrix(df = True)

## Trying (failing?) to prune tree to each community

In [5]:
#taking only present species
spp = mock_mat.apply(lambda row: row.index[row == 1].tolist(), axis=1)
spp
#query_list = spp[2]
#mock.sp_tree.get_nodes(*query_list)

#pruning the tree to present species for each site
#new_tree = toytree.mod.prune(mock.sp_tree, *query_list)
#new_tree.draw()


0     [r1, r2, r3, r4, r5, r8, r9, r12, r14, r15, r1...
1     [r1, r2, r9, r10, r12, r14, r16, r17, r18, r22...
2     [r0, r1, r3, r4, r6, r8, r9, r10, r11, r12, r1...
3     [r0, r1, r3, r4, r5, r6, r7, r9, r10, r11, r14...
4     [r0, r1, r2, r4, r5, r6, r9, r10, r12, r15, r1...
5     [r4, r5, r7, r8, r10, r12, r14, r15, r16, r17,...
6     [r2, r3, r4, r5, r7, r8, r10, r12, r14, r15, r...
7     [r1, r2, r3, r5, r7, r10, r11, r12, r13, r14, ...
8     [r2, r4, r5, r7, r9, r10, r11, r13, r14, r15, ...
9     [r0, r1, r3, r6, r7, r8, r11, r13, r15, r16, r...
10    [r1, r5, r6, r8, r9, r10, r12, r13, r14, r15, ...
11    [r1, r2, r3, r6, r7, r9, r10, r11, r12, r13, r...
12    [r0, r2, r3, r4, r5, r6, r11, r12, r14, r16, r...
13    [r2, r3, r6, r7, r8, r9, r11, r14, r15, r17, r...
14    [r0, r1, r2, r3, r5, r6, r7, r10, r11, r12, r1...
15    [r1, r2, r3, r5, r10, r11, r12, r13, r16, r17,...
16    [r1, r3, r4, r6, r7, r8, r11, r12, r14, r15, r...
17    [r0, r2, r3, r4, r7, r8, r10, r11, r12, r1

In [6]:
#creating a tree for each mock community
comm_trees = []
for i in range(len(spp)):
    query_list = spp[i]
    new_tree = toytree.mod.prune(mock.sp_tree, *query_list)
    comm_trees.append(new_tree)

comm_trees[0].draw();

In [143]:
# I think this is unnecessary, but I'm keeping it in case anyway

#getting a distance matrix for each tree
#comm_dists = []
#for t in comm_trees:
#    dm = t.distance.get_tip_distance_matrix()
#    comm_dists.append(dm)
    
#comm_dists[0]

## Visualizing the trees from each community

### Highlighting included tips on metacommunity tree

In [9]:
#create a mask for species in the community only
mask = []
for i in range(len(spp)):
    query_list = spp[i]
    comm_mask = mock.sp_tree.get_node_mask(*query_list)
    mask.append(comm_mask)
#mask[0]
#mock.sp_tree.draw(node_mask=mask[0], node_sizes=12);

In [None]:
# a MultiTree containing 50 random coalescent trees with 10 tips each
mtree = toytree.mtree([toytree.rtree.coaltree(10) for i in range(50)])

In [None]:
#create all of the trees
viz_trees = []
for i in range(len(spp)):
    
#create a grid of all of the community trees

### Visualizing the pruned trees

## Calculating different metrics

### Faith's phylogenetic distance (PD)
The sum of all branch lengths in the community

In [144]:
#remove absent species from distance matrix
comm_dists = []
for comm in spp:
    comm_dm = meta_dm.loc[comm, comm]
    comm_dists.append(comm_dm)

In [116]:
# This isn't right--I summed all distances, but that double counts a lot of edges. Need to only count each edge once.

#sum all distances in each matrix
pd = []
for dm in comm_dists:
    total = np.sum(np.triu(dm, k=1)) #takes the upper triangle only and excludes 0s
    pd.append(total)

pd
#check against picante in R

[742.3333333333333,
 736.0,
 740.9999999999999,
 737.3333333333333,
 743.9999999999999,
 738.6666666666666,
 742.0,
 746.0,
 744.0,
 744.6666666666665,
 742.9999999999998,
 742.6666666666666,
 744.0,
 745.0,
 740.3333333333333,
 739.3333333333333,
 744.0,
 742.3333333333333,
 740.3333333333333,
 739.6666666666665]

This is super different.... picante result is between 13 and 14....

In [None]:
# Do something in here to sum each edge (only counted once)

### Mean pairwise distance (MPD)
The mean distance between any pair of species in the community

### Mean nearest neighbor distance (MNND)
The mean distance from one species to its nearest neighbor for all species in the community