In [2]:
import dendropy as dp
import numpy as np
import re, sys, os
import pandas as pd

In [3]:
target_type = 'type1'
tree = dp.Tree.get_from_path('set_test_template_r0_nMig12_migAge1.1_splitAge1.33_prunedLowSamp.nexus.tree', 'nexus')

In [4]:
for i in tree.leaf_node_iter():
    i.taxon.label = re.sub('^_|^ |_$| $', '', i.taxon.label)

def check_monophyly(node, target_type):
    tip_types = []
    tips = []
    for i in node.leaf_iter():
        tips.append(i)
        tip_types.append(re.split('_| ', i.taxon.label)[1])
    return( [all([j == target_type for j in tip_types]), tips] )

monophyletic_nodes = []
all_imports = []
visited_tips = []
visited_nodes = []
for tip in tree.leaf_node_iter():
    if tip in visited_tips:
        continue
    if(re.split('_| ', tip.taxon.label)[1] == target_type):
        monophyletic_ancestors = []
        for ancestor in tip.ancestor_iter():
            if ancestor in visited_nodes:
                continue
            is_monophyletic = check_monophyly(ancestor, target_type) 
            if is_monophyletic[0]:
                monophyletic_ancestors.append([ancestor, ancestor.distance_from_root()])
                for child_tip in is_monophyletic[1]:
                    visited_tips.append([child_tip, child_tip.distance_from_root()])
                visited_nodes.append(ancestor)
            else:
                all_imports.append([ancestor, ancestor.distance_from_root() - ancestor.edge_length])# Because we want to count singletons too!
                visited_tips.append([tip, tip.distance_from_root()])
                visited_nodes.append(ancestor)
                break
        if len(monophyletic_ancestors) > 0:
            oldest_monophyletic_ancestor = monophyletic_ancestors[-1] 
            if not (oldest_monophyletic_ancestor in monophyletic_nodes):
                monophyletic_nodes.append(oldest_monophyletic_ancestor)
        

In [5]:
visited_tips = []
importation_nodes = []
for tip in tree.leaf_node_iter():
    if(len(re.findall(target_type, tip.taxon.label)) > 0):
        if(tip in visited_tips):
            #print('I am alredy a visited tip')
            continue
        else:
            print('I am in tip'+tip.taxon.label)
            print('I have not been here before')
        #print('I have visited '+str(len(visited_tips))+' tips')
        ancestors_of_tip = []
        for ancestor in tip.ancestor_iter():
            is_monophyletic = check_monophyly(ancestor, target_type=target_type)
            ancestors_of_tip.append(ancestor)
            if(not is_monophyletic[0]):
                most_recent_decendants = check_monophyly(ancestors_of_tip[len(ancestors_of_tip)-2], target_type=target_type)[1]
                [visited_tips.append(i) for i in most_recent_decendants]
                importation_nodes.append(ancestor)
                break
        #print('Now I have visited '+str(len(visited_tips)))
        print('I have '+str(len(ancestors_of_tip))+' monophyletic ancestors')

I am in tip11_type1_1.53
I have not been here before
I have 3 monophyletic ancestors
I am in tip14_type1_1.44
I have not been here before
I have 2 monophyletic ancestors
I am in tip56_type1_1.8
I have not been here before
I have 2 monophyletic ancestors
I am in tip64_type1_1.73
I have not been here before
I have 1 monophyletic ancestors
I am in tip96_type1_1.59
I have not been here before
I have 3 monophyletic ancestors


In [6]:
importation_nodes


[<Node object at 0x7f53cae37550: 'None' (None)>,
 <Node object at 0x7f53cae37d60: 'None' (None)>,
 <Node object at 0x7f53cae46f40: 'None' (None)>,
 <Node object at 0x7f53cae4da60: 'None' (None)>,
 <Node object at 0x7f53cae4d580: 'None' (None)>]

In [7]:
import_ages = [i.distance_from_root() for i in importation_nodes] # From here find the first importation
np.min(import_ages)

0.6632636919336057

In [8]:
0.51688+0.14639

0.66327

In [10]:
tip_ages = [i.distance_from_root() for i in visited_tips]
tip_ages

[1.6284672174504489,
 1.534342301228392,
 1.5198114326537366,
 1.5573423068620187,
 1.4422462049698934,
 1.749478360498073,
 1.4390943728518435,
 1.6308756389361019,
 1.6726266404784051,
 1.7873415650769569,
 1.798978273472032,
 1.7736298410850937,
 1.7440542607122067,
 1.4696601519239942,
 1.7293351234934127,
 1.5884600455310545,
 1.3505947960724713,
 1.2448131674623413,
 1.6859892062305737]

In [91]:
age_last_sample = re.split('_', visited_tips[tip_ages.idxmax()][0].taxon.label)[2]
node_ages = [i.distance_from_root() for i in tree.postorder_internal_node_iter()]

In [97]:
float(age_last_sample) - np.min(node_ages)

1.7188529432708306

In [95]:
np.max(node_ages)

1.825924016268837

In [7]:
# Get all tip ages
tip_ages = []
for i in tree.leaf_node_iter():
    tip_ages.append([i.taxon.label, i.annotations.get_value('location'), i.distance_from_root()])

tip_ages = pd.DataFrame(tip_ages)
tip_ages.columns = ['tip_label', 'type', 'date']
tip_ages.head()

Unnamed: 0,tip_label,type,date
0,1_type0_1.35,0,1.351354
1,2_type0_1.38,0,1.376085
2,3_type0_1.3,0,1.299793
3,4_type0_1.28,0,1.283897
4,5_type0_1.37,0,1.365684


In [8]:

# Find age of first human case
location_human = 'type1' #this would be in the args
first_human_case = tip_ages.date[tip_ages.type == location_human].min()
first_human_case

nan

In [None]:


# Sample tips with high sampling probabilty after first human case
print('Sampling non human samples with probability '+str(non_human_sampling_prop_high)+' after first human case')
tips_to_remove = []
for i in range(tip_ages.shape[0]):
    if tip_ages.date[i] < first_human_case:
        tips_to_remove.append(tip_ages.tip_label[i])
        print('Prunning (opportunistic)'+tip_ages.tip_label[i])
    elif tip_ages.type[i] != location_human and tip_ages.date[i] >= first_human_case:
        if np.random.binomial(1, non_human_sampling_prop_high) == 0:
            tips_to_remove.append(tip_ages.tip_label[i])
            print('Prunning (opportunistic)'+tip_ages.tip_label[i])
            
tree_opportunistic_sampling = tree.clone(depth = 1)
tree_opportunistic_sampling.prune_taxa_with_labels(tips_to_remove)
tree_opportunistic_sampling.write_to_path(re.sub('.nexus.tree', '_prunedOppSamp.nexus.tree', tree_file_name), 'nexus')

aln_opportunistic_sampling = alignment.clone(depth = 1)
prune_alignment(aln_opportunistic_sampling, tips_to_remove).write_to_path(re.sub('.fasta', '_prunedOppSamp.fasta', aln_file_name), 'fasta')

