## nb1 Distance module testing

Toytree distance module in development

In [1]:
import toytree
import pandas as pd
import numpy as np

In [2]:
t0 = toytree.tree('((A, B), ((C, (D, E)), (F, (G, (H, I)))));')
t1 = toytree.tree('((A, B), ((C, D, (E, I)), (F, (G, H))));')
toytree.mtree([t0, t1]).draw(height=220);

In [3]:
a = [(1, 2), (3, 4, 5)]

In [4]:
data = pd.Series(
    index=["mean", "std", "se", "n"],
    name="tree_distance",
    dtype=float,
)

In [5]:
import itertools

In [6]:
a = set([1, 2, 3])
b = set([3, 4, 5])

In [7]:
len(t0.treenode.children[0].get_leaves())

2

In [8]:
t0.unroot().treenode.children

(<Node(idx=0, name='A')>, <Node(idx=1, name='B')>, <Node(idx=14)>)

In [9]:
t0.treenode.children

(<Node(idx=9)>, <Node(idx=15)>)

In [10]:
# mtrees = [toytree.rtree.unittree(10) for i in range(10)]
# min([toytree.distance.get_treedist_rfg_msi(t0, i, True) for i in mtrees])

In [11]:
def randomize_names(tree):
    tree

In [15]:
d = t0.enum.iter_bipartitions()
list(d)

[({'A', 'B'}, {'C', 'D', 'E', 'F', 'G', 'H', 'I'}),
 ({'D', 'E'}, {'A', 'B', 'C', 'F', 'G', 'H', 'I'}),
 ({'C', 'D', 'E'}, {'A', 'B', 'F', 'G', 'H', 'I'}),
 ({'H', 'I'}, {'A', 'B', 'C', 'D', 'E', 'F', 'G'}),
 ({'G', 'H', 'I'}, {'A', 'B', 'C', 'D', 'E', 'F'}),
 ({'F', 'G', 'H', 'I'}, {'A', 'B', 'C', 'D', 'E'})]

<generator object Node._iter_leaves at 0x7f76f70bce40>

In [18]:
names = t0.get_tip_labels()

def test(t0, names):
    np.random.shuffle(names)
    for i, node in enumerate(t0.treenode.iter_leaves()):
        node.name = names[i]
    #[node.name = i for (node, i) in zip(t0.treenode._iter_leaves(), names)]
    return t0.iter_bipartitions()

In [20]:
list(test(t0, names))

[({'E', 'I'}, {'A', 'B', 'C', 'D', 'F', 'G', 'H'}),
 ({'B', 'G'}, {'A', 'C', 'D', 'E', 'F', 'H', 'I'}),
 ({'B', 'D', 'G'}, {'A', 'C', 'E', 'F', 'H', 'I'}),
 ({'A', 'H'}, {'B', 'C', 'D', 'E', 'F', 'G', 'I'}),
 ({'A', 'C', 'H'}, {'B', 'D', 'E', 'F', 'G', 'I'}),
 ({'A', 'C', 'F', 'H'}, {'B', 'D', 'E', 'G', 'I'})]

In [97]:
%%timeit
list(test(t0, names))

150 µs ± 1.28 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [178]:
def _random_bipartitions(btable, names):
    np.random.shuffle(names)
    for idx in range(btable.shape[0]):
        mask0 = btable[idx].astype(bool)
        mask1 = np.invert(mask0)
        yield tuple(names[mask0]), tuple(names[mask1])

In [179]:
bt = t0._get_bipartitions_table(exclude_singleton_splits=True).values
nn = np.array(t0.get_tip_labels())

In [184]:
%%timeit
list(_random_bipartitions(bt, nn))

27.3 µs ± 1.35 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


{Node(2)}

In [6]:
    >>> t0 = toytree.rtree.unittree(ntips=10, seed=123)
    >>> t1 = toytree.rtree.unittree(ntips=10, seed=321)
#     >>> t0.distance.get_treedist_rf(t1)

In [12]:
    >>> t0 = toytree.rtree.unittree(ntips=10, seed=123)
    >>> t1 = toytree.rtree.unittree(ntips=10, seed=321)
    >>> print(t0.distance.get_treedist_rf(t1, normalize=False))
    >>> print(t0.distance.get_treedist_rf(t1, normalize=True))

8
0.5714285714285714


In [8]:
    >>> t0.distance.get_treedist_rf(t1, normalize=False)

8

In [11]:
    >>> t0.distance.get_treedist_rf(t1, normalize=sum)    

0.5714285714285714

In [None]:
    >>> t0 = toytree.rtree.unittree(ntips=10, seed=123)
    >>> t1 = toytree.rtree.unittree(ntips=10, seed=321)
    >>> t0.distance.get_treedist_rf(t1)
    >>> t0.distance.get_treedist_rf(t1, normalize=True)
    >>> t0.distance.get_treedist_rf(t1, normalize=False)
    >>> t0.distance.get_treedist_rf(t1, normalize=max)    

In [31]:
from toytree.distance._src.treedist import (
    _get_n_trees_matching_two_splits,
    _get_phylo_info_two_splits,
    _get_split_phylo_info,
    _get_phylo_info,
    _get_two_splits_joint_phylo_info,
    _get_two_splits_shared_phylo_info,
)

In [74]:
print(t0._get_bipartitions_table(exclude_singleton_splits=True))
print(t1._get_bipartitions_table(exclude_singleton_splits=True))

[[1 1 0 0 0 0 0 0 0]
 [0 0 0 1 1 0 0 0 0]
 [0 0 1 1 1 0 0 0 0]
 [0 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 1 1 1]
 [0 0 0 0 0 1 1 1 1]]
[[1 1 0 0 0 0 0 0 0]
 [0 0 0 0 1 1 0 0 0]
 [0 0 1 1 1 1 0 0 0]
 [0 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 1 1 1]]


In [75]:
from itertools import product

set2 = set(t0._iter_bipartitions())
set1 = set(t1._iter_bipartitions())
arr = np.zeros((len(set1), len(set2)))

for i, bip1 in enumerate(set1):
    for j, bip2 in enumerate(set2):
        arr[i, j] = _get_two_splits_shared_phylo_info(bip1, bip2)

print(len(set2))
arr = pd.DataFrame(arr)
arr

6


Unnamed: 0,0,1,2,3,4,5
0,9.275349,10.123345,0.241008,7.400879,9.275349,7.400879
1,11.149818,3.252981,0.530515,0.530515,1.182591,9.275349
2,2.115477,1.378512,0.241008,0.241008,0.530515,7.400879
3,11.997815,12.845811,0.893085,10.123345,11.997815,10.123345
4,0.530515,0.893085,3.70044,0.241008,0.530515,0.241008


In [60]:
from scipy.optimize import linear_sum_assignment

In [61]:
linear_sum_assignment(arr)

(array([0, 1, 2, 3, 4]), array([5, 3, 4, 2, 0]))

In [117]:
_get_n_trees_matching_two_splits(8, 3, 4)

45

In [123]:
_get_phylo_info_two_splits(8, 3, 4)

7.851749041416057

In [86]:
>>> split1 = ((0, 1, 2, 3), (4, 5, 6, 7))
>>> split2 = ((0, 1, 2), (3, 4, 5, 6, 7))
>>> _get_two_splits_shared_phylo_info(split2, split2)

5.0443941193584525

In [99]:
>>> split1 = ((0, 1, 2, 3), (4, 5, 6, 7))
>>> split2 = ((0, 1, 2), (3, 4, 5, 6, 7))
>>> _get_two_splits_joint_phylo_info(split2, split1)

7.851749041416057

In [148]:
%%timeit
t0.get_bipartitions()

299 µs ± 4.45 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [149]:
%%timeit
list(t0._iter_bipartitions())

135 µs ± 244 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [157]:
df = t0.get_bipartitions()

## TODO

In [190]:
# toyplot 1.0
# toytree.color is simplified toyplot.color
# t0.distance.get_treedist_rfg(t1)
# t0.draw('s', edge_colors=t0.style.edge_colors);
# logger.warning on bad entry
# 

In [199]:
t0.distance.get_treedist_rf(t1, t2, t3, metric=t0.distance.get_treedist_rf)

NameError: name 't2' is not defined

In [202]:
normalize = False

match normalize:
    case _:
        print("no")

SyntaxError: invalid syntax (2843030508.py, line 3)

In [184]:
t0.draw()

(<toyplot.canvas.Canvas at 0x7fed01ec7d90>,
 <toyplot.coordinates.Cartesian at 0x7fed013a5c10>,
 <toytree.core.drawing.toytree_mark.ToytreeMark at 0x7fed01e880a0>)

In [164]:
t0.get_bipartitions(exclude_singleton_splits=False)

Unnamed: 0,0,1
0,"(A,)","(B, C, D, E, F, G, H, I)"
1,"(B,)","(A, C, D, E, F, G, H, I)"
2,"(C,)","(A, B, D, E, F, G, H, I)"
3,"(D,)","(A, B, C, E, F, G, H, I)"
4,"(E,)","(A, B, C, D, F, G, H, I)"
5,"(F,)","(A, B, C, D, E, G, H, I)"
6,"(G,)","(A, B, C, D, E, F, H, I)"
7,"(H,)","(A, B, C, D, E, F, G, I)"
8,"(I,)","(A, B, C, D, E, F, G, H)"
9,"(A, B)","(C, D, E, F, G, H, I)"


In [129]:
t0.draw('s');

In [62]:
t0.get_bipartitions()

Unnamed: 0,0,1
0,"(A, B)","(C, D, E, F, G, H, I)"
1,"(D, E)","(A, B, C, F, G, H, I)"
2,"(C, D, E)","(A, B, F, G, H, I)"
3,"(H, I)","(A, B, C, D, E, F, G)"
4,"(G, H, I)","(A, B, C, D, E, F)"
5,"(F, G, H, I)","(A, B, C, D, E)"


In [63]:
t1.get_bipartitions()

Unnamed: 0,0,1
0,"(A, B)","(C, D, E, F, G, H, I)"
1,"(E, I)","(A, B, C, D, F, G, H)"
2,"(C, D, E, I)","(A, B, F, G, H)"
3,"(G, H)","(A, B, C, D, E, F, I)"
4,"(F, G, H)","(A, B, C, D, E, I)"


In [25]:
t0.draw();

In [26]:
t1.draw();

In [54]:
set1 = list(t0._iter_bipartitions())[0]
set2 = list(t1._iter_bipartitions())[1]

set1, set2


((('A', 'B'), ('C', 'D', 'E', 'F', 'G', 'H', 'I')),
 (('E', 'I'), ('A', 'B', 'C', 'D', 'F', 'G', 'H')))

In [22]:
_get_split_phylo_info(((1, 2, 3), (4, 5)))

2.321928094887362

In [4]:
treesMatchingBoth = _get_trees_matching_two_splits(6, 3, 2) # 3
combinedInformation = _get_phylo_info_two_splits(6, 3, 2)
combinedInformation


5.129283016944966

In [2]:
# ecolor = (t
#     .set_node_data('ecolor', {i: 'red' for i in (8, 10, 13)}, default="black")
#     .get_node_data('ecolor')
# )

In [7]:
from scipy.special import factorial2

In [8]:
def _split_phylogenetic_probability(split) -> float:
    """Return the phylogenetic probability of a split sensu Martin Smith 2020.

    This is the probability that a randomly sampled binary tree of 
    size X contains the split S. It is used in rf-info.
    """
    size_a = len(split[0])
    size_b = len(split[1])
    size_x = size_a + size_b
    return (
        factorial2(2 * size_a - 3)
        * factorial2(2 * size_b - 3)
        / factorial2(2 * size_x - 5)
    )

In [10]:
t0 = toytree.tree('((A, B), ((C, (D, E)), (F, (G, (H, I)))));')
t1 = toytree.tree('((A, B), ((C, D, (E, I)), (F, (G, H))));')

In [10]:
##           [,1]      [,2]      [,3]      [,4]      [,5]
## [1,] 3.7004397 0.8930848 0.2410081 0.5305147 0.2410081
## [2,] 0.5305147 3.2529807 0.0000000 1.1825914 0.5305147
## [3,] 0.2410081 1.3785116 0.0000000 0.5305147 0.2410081
## [4,] 0.8930848 0.0000000 0.0000000 3.2529807 1.3785116
## [5,] 0.5305147 0.0000000 0.0000000 0.0000000 2.1154772
## [6,] 0.2410081 0.0000000 0.0000000 0.0000000 0.0000000

In [12]:
# number of trees
int(factorial2(2 * 6 - 5))

105

In [None]:
factorial2(2 * size_a - 3) * factorial2(2 * size_b - 3)


In [13]:
t0

NameError: name 't0' is not defined

In [16]:
t0.get_bipartitions()

Unnamed: 0,0,1
0,"(A, B)","(C, D, E, F, G, H, I)"
1,"(D, E)","(A, B, C, F, G, H, I)"
2,"(C, D, E)","(A, B, F, G, H, I)"
3,"(H, I)","(A, B, C, D, E, F, G)"
4,"(G, H, I)","(A, B, C, D, E, F)"
5,"(F, G, H, I)","(A, B, C, D, E)"


In [17]:
t1.get_bipartitions()

Unnamed: 0,0,1
0,"(A, B)","(C, D, E, F, G, H, I)"
1,"(E, I)","(A, B, C, D, F, G, H)"
2,"(C, D, E, I)","(A, B, F, G, H)"
3,"(G, H)","(A, B, C, D, E, F, I)"
4,"(F, G, H)","(A, B, C, D, E, I)"


In [127]:
arr = np.zeros((5, 5))
arr[0, 0] = 

arr

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [107]:
tree1.write(dist_formatter=None, internal_labels=None)

'((((r0,r1),(r2,r3)),(r4,r5)),(r6,r7));'

In [98]:
with tempfile.NamedTemporaryFile(mode='w+', encoding="utf-8") as out:
    out.write("hi")
    print(out.name)
    out.flush()
    with open(out.name) as i:
        print(i.read())
        

/tmp/tmp060vz529
hi


In [70]:
-np.log2(3 + 3)

-2.584962500721156

In [71]:
-np.log2(3) + -np.log2(3)

-3.169925001442312

In [48]:
biparts = list(t._iter_bipartitions())
biparts

[(('r0',), ('r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7')),
 (('r1',), ('r0', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7')),
 (('r2',), ('r0', 'r1', 'r3', 'r4', 'r5', 'r6', 'r7')),
 (('r3',), ('r0', 'r1', 'r2', 'r4', 'r5', 'r6', 'r7')),
 (('r4',), ('r0', 'r1', 'r2', 'r3', 'r5', 'r6', 'r7')),
 (('r5',), ('r0', 'r1', 'r2', 'r3', 'r4', 'r6', 'r7')),
 (('r6',), ('r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r7')),
 (('r7',), ('r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6')),
 (('r0', 'r1'), ('r2', 'r3', 'r4', 'r5', 'r6', 'r7')),
 (('r2', 'r3'), ('r0', 'r1', 'r4', 'r5', 'r6', 'r7')),
 (('r0', 'r1', 'r2', 'r3'), ('r4', 'r5', 'r6', 'r7')),
 (('r4', 'r5'), ('r0', 'r1', 'r2', 'r3', 'r6', 'r7')),
 (('r6', 'r7'), ('r0', 'r1', 'r2', 'r3', 'r4', 'r5'))]

In [49]:
t.draw('s', edge_colors=ecolor);

In [16]:
t.get_bipartitions()

Unnamed: 0,0,1
0,"(r0,)","(r1, r2, r3, r4, r5, r6, r7)"
1,"(r1,)","(r0, r2, r3, r4, r5, r6, r7)"
2,"(r2,)","(r0, r1, r3, r4, r5, r6, r7)"
3,"(r3,)","(r0, r1, r2, r4, r5, r6, r7)"
4,"(r4,)","(r0, r1, r2, r3, r5, r6, r7)"
5,"(r5,)","(r0, r1, r2, r3, r4, r6, r7)"
6,"(r6,)","(r0, r1, r2, r3, r4, r5, r7)"
7,"(r7,)","(r0, r1, r2, r3, r4, r5, r6)"
8,"(r0, r1)","(r2, r3, r4, r5, r6, r7)"
9,"(r2, r3)","(r0, r1, r4, r5, r6, r7)"


In [14]:
toytree.mtree([toytree.rtree.unittree(10) for i in range(5)]).get_consensus_tree().draw('s', node_labels="support");

In [5]:
t = toytree.rtree.baltree(8)
t.draw('s', node_labels='name');

In [102]:
# two trees
tree1 = toytree.rtree.unittree(8, seed=123)
tree2 = toytree.rtree.unittree(8, seed=293)

# place trees in a list
trees_list = [tree1, tree2]

# display trees with multitree
TREES = toytree.core.multitree.MultiTree(trees_list)
TREES.draw(ts='p');

### Workflow

1. Input: toytrees
2. User options:
    - Type of distance metric: RF vs. quartets
    - `sampmethod` = Consensus vs. pairwise vs. random 
    - `consensustree` = set as None by default (generate consensus tree for user) OR user provides consensus tree 
4. Output: pandas dataframe with tree #s compared and respective distance metric calculation

In [3]:
# goal

# workflow goal
#def robinson_foulds(tree1, tree2, *args):
   
#    tool = RobinsonFoulds(tree1, tree2, *args)
#    tool.run()
#    return tool.data

In [4]:
# two trees
tree1 = toytree.rtree.unittree(5, seed=123)
tree2 = toytree.rtree.unittree(5, seed=293)

# place trees in a list
trees_list = [tree1, tree2]

# display trees with multitree
TREES = toytree.core.multitree.MultiTree(trees_list)
TREES.draw(ts='p');

In [5]:
import itertools

In [6]:
tree1.draw('s')

(<toyplot.canvas.Canvas at 0x7f524f3fb8e0>,
 <toyplot.coordinates.Cartesian at 0x7f524f3fbb50>,
 <toytree.core.drawing.toytree_mark.ToytreeMark at 0x7f524f403220>)

In [7]:
#tree1.mod.rotate_node('r0', 'r3').draw('s');

In [8]:
a = {1, 2, 3}
b = {2, 3, 4}

In [9]:
a & b

{2, 3}

In [24]:
tree1 = toytree.tree('((A, B), ((C, (D, E)), (F, (G, (H, I)))));')
tree2 = toytree.tree('((A, B), ((C, D, (E, I)), (F, (G, H))));')

In [42]:
#%%timeit
sum(1 for i in tree1._iter_bipartitions() if len(i[0]) > 1)

6

In [46]:
tree1.unroot().get_bipartitions()

Unnamed: 0,0,1
0,"(A,)","(B, C, D, E, F, G, H, I)"
1,"(B,)","(A, C, D, E, F, G, H, I)"
2,"(C,)","(A, B, D, E, F, G, H, I)"
3,"(D,)","(A, B, C, E, F, G, H, I)"
4,"(E,)","(A, B, C, D, F, G, H, I)"
5,"(F,)","(A, B, C, D, E, G, H, I)"
6,"(G,)","(A, B, C, D, E, F, H, I)"
7,"(H,)","(A, B, C, D, E, F, G, I)"
8,"(I,)","(A, B, C, D, E, F, G, H)"
9,"(D, E)","(A, B, C, F, G, H, I)"


In [88]:
toytree.rtree.bdtree(8, b=0.5, d=0.5).draw();

⚠️ toytree: cannot delete root Node.


In [54]:
toytree.rtree.baltree(8).draw();

In [52]:
tree1.unroot().mod.drop_tips("G").get_tip_labels()

['A', 'B', 'C', 'D', 'E', 'F', 'H', 'I']

In [51]:
tree1.unroot().mod.drop_tips("G").draw('s');

In [31]:
tree1.get_bipartitions("idx", exclude_internal_labels)

Unnamed: 0,0,1
0,"(0,)","(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
1,"(1,)","(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
2,"(2,)","(0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
3,"(3,)","(0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
4,"(4,)","(0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
5,"(5,)","(0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
6,"(6,)","(0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14..."
7,"(7,)","(0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14..."
8,"(8,)","(0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14..."
9,"(0, 1, 9)","(2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15)"


In [28]:
a = set(tree1._iter_bipartitions()) 
b = set(tree2._iter_bipartitions())

len(a ^ b), len(a) + len(b) - (tree1.ntips) - tree1.ntips

(9, 11)

In [15]:
toytree.mtree([tree1, tree2]).draw();

In [23]:
tree1._get_bipartitions_table()

array([[1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1],
       [1, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 1, 1, 1, 1]])

In [22]:
set(tree1._iter_bipartitions()) ^ set(tree2._iter_bipartitions())

{(('C', 'D', 'E'), ('A', 'B', 'F', 'G', 'H', 'I')),
 (('C', 'D', 'E', 'I'), ('A', 'B', 'F', 'G', 'H')),
 (('D', 'E'), ('A', 'B', 'C', 'F', 'G', 'H', 'I')),
 (('E', 'I'), ('A', 'B', 'C', 'D', 'F', 'G', 'H')),
 (('F', 'G', 'H'), ('A', 'B', 'C', 'D', 'E', 'I')),
 (('F', 'G', 'H', 'I'), ('A', 'B', 'C', 'D', 'E')),
 (('G', 'H'), ('A', 'B', 'C', 'D', 'E', 'F', 'I')),
 (('G', 'H', 'I'), ('A', 'B', 'C', 'D', 'E', 'F')),
 (('H', 'I'), ('A', 'B', 'C', 'D', 'E', 'F', 'G'))}

In [65]:
list(tree2._iter_bipartitions())

[(('A',), ('B', 'C', 'D', 'E', 'I', 'F', 'G', 'H')),
 (('B',), ('A', 'C', 'D', 'E', 'I', 'F', 'G', 'H')),
 (('C',), ('A', 'B', 'D', 'E', 'I', 'F', 'G', 'H')),
 (('D',), ('A', 'B', 'C', 'E', 'I', 'F', 'G', 'H')),
 (('E',), ('A', 'B', 'C', 'D', 'I', 'F', 'G', 'H')),
 (('I',), ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H')),
 (('F',), ('A', 'B', 'C', 'D', 'E', 'I', 'G', 'H')),
 (('G',), ('A', 'B', 'C', 'D', 'E', 'I', 'F', 'H')),
 (('H',), ('A', 'B', 'C', 'D', 'E', 'I', 'F', 'G')),
 (('A', 'B'), ('C', 'D', 'E', 'I', 'F', 'G', 'H')),
 (('E', 'I'), ('A', 'B', 'C', 'D', 'F', 'G', 'H')),
 (('C', 'D', 'E', 'I'), ('A', 'B', 'F', 'G', 'H')),
 (('H', 'G'), ('A', 'B', 'C', 'D', 'E', 'I', 'F')),
 (('F', 'G', 'H'), ('A', 'B', 'C', 'D', 'E', 'I'))]

In [24]:
tree1.mod.drop_tips("r4")

TypeError: unsupported operand type(s) for +=: 'set' and 'list'

In [22]:
get_quartets_new(tree1)

⚠️ toytree: No tips selected. Matched query: set()


{'r4'}


TypeError: unsupported operand type(s) for +=: 'set' and 'list'

In [17]:
import itertools
random_tree_dist()

{'mean': 0.3137566137566138, 'std': 0.08184827769461263}

In [None]:
get_pairwise_dist(i, j)

In [6]:


def random_tree_dist(ntips=10, ntrees=10):
    rtrees = [toytree.rtree.unittree(ntips, random_names=True) for i in range(ntrees)]
    dists = []
    for treepair in itertools.combinations(rtrees, 2):
        q0 = get_quartets(treepair[0])
        q1 = get_quartets(treepair[1])
        dist = len(q0.intersection(q1)) / len(q0)
        dists.append(dist)
    return {"mean": np.mean(dists), "std": np.std(dists)}



In [7]:
from toytree.distance._src.treedist import (
    get_pairwise_dist, get_distance_matrix)

In [8]:
get_pairwise_dist(*trees_list, 'q')

0.0

In [9]:
get_distance_matrix(trees_list, method='q')

array([[0., 0.],
       [0., 0.]])

### Quartets
Located as `quartets` class object within `treedist.py`

In [25]:
quart = toytree.distance.treedist.quartets(trees_list, "pairwise")
quart.run()
quart.data

AttributeError: module 'toytree.distance' has no attribute 'treedist'

In [6]:
quart = toytree.distance.treedist.quartets(trees_list, "random")
quart.run()
quart.data

Unnamed: 0,trees,Quartet_intersection
0,"1, 0",0.6


In [7]:
quart = toytree.distance.treedist.quartets(trees_list, "consensus")
quart.run()
quart.data

Unnamed: 0,trees,Quartet_intersection
0,"0, consensus",1.0
1,"1, consensus",0.6


### RF
Located as `robinson_foulds` class object within `treedist.py`

In [5]:
rf = toytree.distance.treedist.robinson_foulds(trees_list, "pairwise")
rf.run()
rf.data


      /-r4
   /-|
  |  |   /-r3
  |   \-|
--|      \-r2
  |
  |   /-r1
   \-|
      \-r0
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition [0. 0. 0. 1. 1.]
partition_inverse [1. 1. 1. 0. 0.]
partition [0. 0. 1. 1. 1.]
partition_inverse [1. 1. 0. 0. 0.]
partition [0. 1. 1. 1. 1.]
partition_inverse [1. 0. 0. 0. 0.]
partition [1. 1. 1. 1. 1.]
partition_inverse [0. 0. 0. 0. 0.]
partition skip [1. 1. 1. 1. 1.]

   /-r4
--|
  |   /-r3
   \-|
      \-r2
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition [0. 0. 0. 1. 1.]
partition_inverse [1. 1. 1. 0. 0.]
partition [0. 0. 1. 1. 1.]
partition_inverse [1. 1. 0. 0. 0.]
{(0.0, 0.0, 1.0, 1.0, 1.0)}

--r4
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition skip [0. 0. 0. 0. 1.]

   /-r3
--|
   \-r2
partition [0. 0. 0. 1. 0.]
partition_inverse [1. 1. 1. 0. 1.]
partition [0. 0. 1. 1. 0.]
partition_inverse [1. 1. 0. 0. 1.]
{(0.0, 0.0, 1.0, 1.0, 0.0), (0.0, 0.0, 1.0, 1.0, 1.0)}

--r3
partit

Unnamed: 0,trees,RF,max_RF,normalized_rf
0,"0, 1",4,6.0,0.666667


In [6]:
rf = toytree.distance.treedist.robinson_foulds(trees_list, "random")
rf.run()
rf.data


      /-r4
   /-|
  |  |   /-r3
  |   \-|
--|      \-r2
  |
  |   /-r1
   \-|
      \-r0
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition [0. 0. 0. 1. 1.]
partition_inverse [1. 1. 1. 0. 0.]
partition [0. 0. 1. 1. 1.]
partition_inverse [1. 1. 0. 0. 0.]
partition [0. 1. 1. 1. 1.]
partition_inverse [1. 0. 0. 0. 0.]
partition [1. 1. 1. 1. 1.]
partition_inverse [0. 0. 0. 0. 0.]
partition skip [1. 1. 1. 1. 1.]

   /-r4
--|
  |   /-r3
   \-|
      \-r2
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition [0. 0. 0. 1. 1.]
partition_inverse [1. 1. 1. 0. 0.]
partition [0. 0. 1. 1. 1.]
partition_inverse [1. 1. 0. 0. 0.]
{(0.0, 0.0, 1.0, 1.0, 1.0)}

--r4
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition skip [0. 0. 0. 0. 1.]

   /-r3
--|
   \-r2
partition [0. 0. 0. 1. 0.]
partition_inverse [1. 1. 1. 0. 1.]
partition [0. 0. 1. 1. 0.]
partition_inverse [1. 1. 0. 0. 1.]
{(0.0, 0.0, 1.0, 1.0, 0.0), (0.0, 0.0, 1.0, 1.0, 1.0)}

--r3
partit

Unnamed: 0,trees,RF,max_RF,normalized_rf
0,"1, 0",4,6.0,0.666667


In [7]:
rf = toytree.distance.treedist.robinson_foulds(trees_list, "consensus")
rf.run()
rf.data


      /-r4
   /-|
  |  |   /-r3
  |   \-|
--|      \-r2
  |
  |   /-r1
   \-|
      \-r0
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition [0. 0. 0. 1. 1.]
partition_inverse [1. 1. 1. 0. 0.]
partition [0. 0. 1. 1. 1.]
partition_inverse [1. 1. 0. 0. 0.]
partition [0. 1. 1. 1. 1.]
partition_inverse [1. 0. 0. 0. 0.]
partition [1. 1. 1. 1. 1.]
partition_inverse [0. 0. 0. 0. 0.]
partition skip [1. 1. 1. 1. 1.]

   /-r4
--|
  |   /-r3
   \-|
      \-r2
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition [0. 0. 0. 1. 1.]
partition_inverse [1. 1. 1. 0. 0.]
partition [0. 0. 1. 1. 1.]
partition_inverse [1. 1. 0. 0. 0.]
{(0.0, 0.0, 1.0, 1.0, 1.0)}

--r4
partition [0. 0. 0. 0. 1.]
partition_inverse [1. 1. 1. 1. 0.]
partition skip [0. 0. 0. 0. 1.]

   /-r3
--|
   \-r2
partition [0. 0. 0. 1. 0.]
partition_inverse [1. 1. 1. 0. 1.]
partition [0. 0. 1. 1. 0.]
partition_inverse [1. 1. 0. 0. 1.]
{(0.0, 0.0, 1.0, 1.0, 0.0), (0.0, 0.0, 1.0, 1.0, 1.0)}

--r3
partit

Unnamed: 0,trees,RF,max_RF,normalized_rf
0,"0, consensus",5,5.0,1.0
1,"1, consensus",3,5.0,0.6


## ---

### TO DO:
1. Quartets function - DONE
2. Rewrite RF function - DONE
3. Suggested tweaks: 
  - Raise error when only one tree given
  - Use `args` instead of sampmethod, consensustree etc.
  - access to backup OldRobinsonFoulds option (?)
4. Python class inheritance (decorator @ with class object)