# Conditional clade probability summaries

## Edit primates data to four taxa

In [209]:
from Bio.Nexus.Nexus import Nexus

nexus = Nexus()
nexus.read('../data/primate-mtDNA.nex')
nexus

<Bio.Nexus.Nexus.Nexus at 0x7f8ec5f98cc0>

In [199]:
nexus.taxlabels

['Lemur_catta',
 'Homo_sapiens',
 'Pan',
 'Gorilla',
 'Pongo',
 'Hylobates',
 'Macaca_fuscata',
 'M._mulatta',
 'M._fascicularis',
 'M._sylvanus',
 'Saimiri_sciureus',
 'Tarsius_syrichta']

In [205]:
nexus.matrix = nexus.crop_matrix(delete=nexus.taxlabels[4:])

In [210]:
nexus.write_nexus_data('../data/primate-mtDNA-short.nex', delete=nexus.taxlabels[4:])

'../data/primate-mtDNA-short.nex'

In [194]:
from Bio import SeqIO

seqs = list(SeqIO.parse('../data/primate-mtDNA.nex', 'nexus'))
seqs

[SeqRecord(seq=Seq('AAGCTTCATAGGAGCAACCATTCTAATAATCGCACATGGCCTTACATCATCCAT...CTT', IUPACAmbiguousDNA()), id='Lemur_catta', name='Lemur_catta', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('AAGCTTCACCGGCGCAGTCATTCTCATAATCGCCCACGGGCTTACATCCTCATT...CTT', IUPACAmbiguousDNA()), id='Homo_sapiens', name='Homo_sapiens', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('AAGCTTCACCGGCGCAATTATCCTCATAATCGCCCACGGACTTACATCCTCATT...CTT', IUPACAmbiguousDNA()), id='Pan', name='Pan', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('AAGCTTCACCGGCGCAGTTGTTCTTATAATTGCCCACGGACTTACATCATCATT...CTT', IUPACAmbiguousDNA()), id='Gorilla', name='Gorilla', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('AAGCTTCACCGGCGCAACCACCCTCATGATTGCCCATGGACTCACATCCTCCCT...CTT', IUPACAmbiguousDNA()), id='Pongo', name='Pongo', description='', dbxrefs=[]),
 SeqRecord(seq=Seq('AAGCTTTACAGGTGCAACCGTCCTCATAATCGCCCACGGACTAACCTCTTCCCT...CTT', IUPACAmbiguousDNA()), id='Hylobates', name='Hylobates', description='', dbxrefs=[]),
 SeqRe

In [142]:
dir(seqs[0])

['__add__',
 '__bool__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__le___',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__nonzero__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_per_letter_annotations',
 '_seq',
 '_set_per_letter_annotations',
 '_set_seq',
 'annotations',
 'dbxrefs',
 'description',
 'features',
 'format',
 'id',
 'letter_annotations',
 'lower',
 'name',
 'reverse_complement',
 'seq',
 'translate',
 'upper']

## Data structures for conditional clades

In [46]:
class Split:
    def __init__(self, parent, child):
        assert len(parent) > len(child)
        assert len(child) > 0
        assert len(parent) > 2
        assert child.issubset(parent)
        self.parent = parent
        other_child = parent - child
        child_sorted = sorted(child)
        other_sorted = sorted(other_child)
        if child_sorted[0] < other_sorted[0]:
            self._child1 = frozenset(child)
            self._child1_sorted = child_sorted
            self._child2 = frozenset(other_child)
            self._child2_sorted = other_sorted
        else:
            self._child1 = frozenset(other_child)
            self._child1_sorted = other_sorted
            self._child2 = frozenset(child)
            self._child2_sorted = child_sorted
            
    def get_children(self):
        return self._child1, self._child2
    
    def get_parent(self):
        return frozenset.union(self._child1, self._child2)
    
    def __str__(self):
        return str(self._child1_sorted) + ' | ' + str(self._child2_sorted)
    
    def __eq__(self, other):
        return self.get_children() == other.get_children()
    
    def __repr__(self):
        return 'Split({0}, {1})'.format(set(self.get_parent()), set(self.get_children()[0]))
    
    def __hash__(self):
        return hash(self.get_children())
    
Split({1, 2, 3}, {1})

Split({1, 2, 3}, {1})

In [86]:
from itertools import combinations

def get_splits(clade):
    n = len(clade)
    clade_sorted = sorted(clade)
    first_slice = clade_sorted[:1]
    others = clade_sorted[1:]
    for k in range(n-1):
        for combination in combinations(others, k):
            yield Split(clade, set(list(combination) + first_slice))
        
[str(split) for split in get_splits(set(range(4)))]

['[0] | [1, 2, 3]',
 '[0, 1] | [2, 3]',
 '[0, 2] | [1, 3]',
 '[0, 3] | [1, 2]',
 '[0, 1, 2] | [3]',
 '[0, 1, 3] | [2]',
 '[0, 2, 3] | [1]']

In [75]:
import autograd
import autograd.numpy as anp

def f(params, nested_indices):
    return (params['a'] ** 2)* anp.cos(params['b'][nested_indices[0]]) - params['b'][nested_indices[1]]

f({ 'a': 3.0, 'b': { 'a': -1.0 , 'b': 2.5 } }, ['b', 'a'])

-6.210292539922404

In [76]:
autograd.grad(f)({ 'a': 3.0, 'b': { 'a': -1.0 , 'b': 2.5 } }, ['b', 'a'])

{'a': array(-4.80686169), 'b': {'a': array(-1.), 'b': array(-5.3862493)}}