## nb1 Distance module testing

Toytree distance module in development

In [1]:
import toytree
import ipcoal
import pandas as pd
import numpy as np

### Workflow

1. Input: toytrees
2. User options:
    - Type of distance metric: RF vs. quartets
    - `sampmethod` = Consensus vs. pairwise vs. random 
    - `consensustree` = set as None by default (generate consensus tree for user) OR user provides consensus tree 
4. Output: pandas dataframe with tree #s compared and respective distance metric calculation

In [None]:
# goal

# workflow goal
#def robinson_foulds(tree1, tree2, *args):
   
#    tool = RobinsonFoulds(tree1, tree2, *args)
#    tool.run()
#    return tool.data

In [4]:
# two trees
tree1 = toytree.core.rtree.unittree(5, seed=123)
tree2 = toytree.core.rtree.unittree(5, seed=323)


# place trees in a list
trees_list = [tree1, tree2]

# display trees with multitree
TREES = toytree.core.multitree.MultiTree(trees_list)
TREES.draw(ts='p');

In [4]:
tree1.newick

'((r4:0.666667,(r3:0.333333,r2:0.333333)0:0.333333)0:0.333333,(r1:0.666667,r0:0.666667)0:0.333333);'

In [5]:
tree2.newick

'((r4:0.666667,r3:0.666667)0:0.333333,(r2:0.666667,(r1:0.333333,r0:0.333333)0:0.333333)0:0.333333);'

### Quartets
Located as `quartets` class object within `treedist.py`

In [3]:
quart = toytree.distance.treedist.quartets(trees_list, "pairwise")
quart.run()
quart.data

Unnamed: 0,trees,Quartet_intersection
0,"0, 1",0.6


In [4]:
quart = toytree.distance.treedist.quartets(trees_list, "random")
quart.run()
quart.data

Unnamed: 0,trees,Quartet_intersection
0,"1, 0",0.6


In [5]:
quart = toytree.distance.treedist.quartets(trees_list, "consensus")
quart.run()
quart.data

Unnamed: 0,trees,Quartet_intersection
0,"0, consensus",1.0
1,"1, consensus",0.6


### RF
Located as `robinson_foulds` class object within `treedist.py`

In [5]:
rf = toytree.distance.treedist.robinson_foulds(trees_list, "pairwise")
rf.run()
rf.data


      /-r4
   /-|
  |  |   /-r3
  |   \-|
--|      \-r2
  |
  |   /-r1
   \-|
      \-r0
[0. 0. 0. 0. 1.]
[0. 0. 0. 1. 1.]
[0. 0. 1. 1. 1.]
[0. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
bits skip [1. 1. 1. 1. 1.]

   /-r4
--|
  |   /-r3
   \-|
      \-r2
[0. 0. 0. 0. 1.]
[0. 0. 0. 1. 1.]
[0. 0. 1. 1. 1.]

--r4
[0. 0. 0. 0. 1.]
bits skip [0. 0. 0. 0. 1.]

   /-r3
--|
   \-r2
[0. 0. 0. 1. 0.]
[0. 0. 1. 1. 0.]

--r3
[0. 0. 0. 1. 0.]
bits skip [0. 0. 0. 1. 0.]

--r2
[0. 0. 1. 0. 0.]
bits skip [0. 0. 1. 0. 0.]

   /-r1
--|
   \-r0
[0. 1. 0. 0. 0.]
[1. 1. 0. 0. 0.]

--r1
[0. 1. 0. 0. 0.]
bits skip [0. 1. 0. 0. 0.]

--r0
[1. 0. 0. 0. 0.]
bits skip [1. 0. 0. 0. 0.]

      /-r4
   /-|
  |   \-r3
--|
  |   /-r2
   \-|
     |   /-r1
      \-|
         \-r0
[0. 0. 0. 0. 1.]
[0. 0. 0. 1. 1.]
[0. 0. 1. 1. 1.]
[0. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
bits skip [1. 1. 1. 1. 1.]

   /-r4
--|
   \-r3
[0. 0. 0. 0. 1.]
[0. 0. 0. 1. 1.]

--r4
[0. 0. 0. 0. 1.]
bits skip [0. 0. 0. 0. 1.]

--r3
[0. 0. 0. 1. 0.]
bits skip [0. 0

Unnamed: 0,trees,RF,max_RF,normalized_rf
0,"0, 1",4,6.0,0.666667


In [16]:
rf = toytree.distance.treedist.robinson_foulds(trees_list, "random")
rf.run()
rf.data

Unnamed: 0,trees,RF,max_RF,normalized_rf
0,"1, 0",4,6.0,0.666667


In [17]:
rf = toytree.distance.treedist.robinson_foulds(trees_list, "consensus")
rf.run()
rf.data

Unnamed: 0,trees,RF,max_RF,normalized_rf
0,"0, consensus",5,5.0,1.0
1,"1, consensus",3,5.0,0.6


## ---

### TO DO:
1. Quartets function - DONE
2. Rewrite RF function - DONE
3. Suggested tweaks: 
  - Raise error when only one tree given
  - Use `args` instead of sampmethod, consensustree etc.
  - access to backup OldRobinsonFoulds option (?)
4. Python class inheritance (decorator @ with class object)