### toytree quartet functions (in progress)

In [1]:
import toytree
import itertools
import numpy as np

### get two random trees

In [2]:
t0 = toytree.rtree.unittree(10, seed=0)
t1 = toytree.rtree.unittree(10, seed=1)

In [3]:
toytree.mtree([t0, t1]).draw(ts='p', height=200);

### Quartet functions

In [4]:
def get_quartets(ttre):
    
    # store all quartets in this SET
    qset = set([])
    
    # get a SET with all tips in the tree
    fullset = set(ttre.get_tip_labels())
    
    # get a SET of the descendants from each internal node
    for node in ttre.idx_dict.values():   

        # skip leaf nodes
        if not node.is_leaf():
            
            children = set(node.get_leaf_names())
            prod = itertools.product(
                itertools.combinations(children, 2),
                itertools.combinations(fullset - children, 2),
            )
            quartets = set([tuple(itertools.chain(*i)) for i in prod])
            qset = qset.union(quartets)

    # order tups in sets
    sorted_set = set()
    for qs in qset:
        if np.argmin(qs) > 1:
            tup = tuple(sorted(qs[2:]) + sorted(qs[:2]))
            sorted_set.add(tup)
        else:
            tup = tuple(sorted(qs[:2]) + sorted(qs[2:]))
            sorted_set.add(tup)            
    
    return sorted_set

In [5]:
def get_quartets_new(ttre):
    # store all quartets in this list
    qlist = list()
    
    # get names
    tnames = ttre.get_tip_labels()
    
    # for each combination of four names...
    for qnames in itertools.combinations(tnames,4):
        # drop all tips not in our four chosen
        qtre = ttre.drop_tips(set(tnames).difference(qnames))
        
        # ladderize to order tips around split
        ltre = qtre.ladderize()
        
        # get names ordered around split (first two on one side, second two on other side)
        snames = ltre.get_tip_labels()
        
        # append to quartet list
        qlist.append(tuple(snames))
    return(set(tuple(qlist)))

In [6]:
%%timeit
get_quartets(t0)

2.06 ms ± 11.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%%timeit
get_quartets_new(t0)

305 ms ± 10.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Compare quartet sets method 1

In [8]:
q0 = get_quartets(t0)
q1 = get_quartets(t1)

In [9]:
# quartets that are in one tree but not the other
diffs = q0.symmetric_difference(q1)
len(diffs)

24

In [10]:
len(q0.intersection(q1)) / len(q0)

0.9428571428571428

### Compare quartets method 2

In [11]:
q0 = get_quartets_new(t0)
q1 = get_quartets_new(t1)

In [12]:
# quartets that are in one tree but not the other
diffs = q0.symmetric_difference(q1)
len(diffs)

68

In [13]:
len(q0.intersection(q1)) / len(q0)

0.8380952380952381

### Expectation that 1/3 will match on random trees

In [14]:
def random_tree_dist(ntips=10, ntrees=10):
    rtrees = [toytree.rtree.unittree(ntips, random_names=True) for i in range(ntrees)]
    dists = []
    for treepair in itertools.combinations(rtrees, 2):
        q0 = get_quartets(treepair[0])
        q1 = get_quartets(treepair[1])
        dist = len(q0.intersection(q1)) / len(q0)
        dists.append(dist)
    return {"mean": np.mean(dists), "std": np.std(dists)}

In [15]:
# looks pretty close to the expectation
random_tree_dist(20, 10)

{'mean': 0.33851163857355804, 'std': 0.034079189736534586}