In [226]:
import dendropy as dp
import numpy as np
import pandas as pd

In [227]:
location_human = '1'
psamp_human = 0.8

In [228]:
tree = dp.Tree.get_from_path('set1_MPXV_common_r0.nexus.tree', 'nexus')

# Paste locations to taxon labels:
for node in tree.postorder_node_iter():
    if node.is_leaf():
        location =  node.annotations.get_value('location')
        node.taxon.label = node.taxon.label + '_' + location

In [229]:
# Get ground truth of number of imports, tmrca of first human import, tmrca of tree

In [230]:
migration_events = []
node_ages = []
for node in tree.postorder_node_iter():
    reaction = node.annotations.get_value('reaction')
    age = node.distance_from_root()
    node_ages.append(age)
    if reaction == 'Migration':
        location = node.annotations.get_value('location')
        migration_events.append([reaction, location, node.child_nodes()[0].annotations.get_value('location'), age])

In [231]:
migration_events

[['Migration', '0', '1', 1.5426691500513183],
 ['Migration', '0', '1', 1.3758556009501928],
 ['Migration', '0', '1', 1.5432836549704976],
 ['Migration', '0', '1', 1.5639668761508145],
 ['Migration', '0', '1', 1.683477017041051],
 ['Migration', '0', '1', 1.8174272689406739],
 ['Migration', '0', '1', 1.179726455599341],
 ['Migration', '0', '1', 1.4810564054264064],
 ['Migration', '0', '1', 1.3733153341318793],
 ['Migration', '0', '1', 1.4457324962016096],
 ['Migration', '0', '1', 1.4913357771577922],
 ['Migration', '0', '1', 1.4971891606808434],
 ['Migration', '1', '0', 1.959370772416074],
 ['Migration', '0', '1', 1.947092251346933],
 ['Migration', '1', '0', 1.354222223681294],
 ['Migration', '0', '1', 1.0730795048786974],
 ['Migration', '0', '1', 1.488952265709465],
 ['Migration', '0', '1', 1.5902757481458623]]

In [232]:
# 1. export tree with the data above. 
# 2. prune tips with removed reaction and location 0. These are animals 
# 2.1 Prune tips with removed reaction and location 1. Thesea are humans
# 2.3 prune nodes that are not leaves and which have a single descendants (migrations)
# 2.4 Calculate number of imports, date of first import, and tmrca of sampled tree
# 2.5 Export tree in nexus. This is the 'sampled' tree

# 3 take initial tree and remove only location 1 from the removed reaction
# 3.1 prune all tips with location 0 prior to the first location 1
# 2.5 Export tree in nexus. This is the 'opportunistically sampled' tree


In [233]:
to_prune = []
for node in tree.postorder_node_iter():
    if len(node.child_nodes()) == 1:
        to_prune.append(node)
tree.prune_nodes(to_prune)
tree.write_to_path('test_no_internal_nodes.tree', 'nexus')

In [223]:
sum( [bool(np.random.binomial(1, 0.8)) for i in range(100)] )

78

In [224]:
tree_reaction_location_age.loc[tree_reaction_location_age.iloc[:, 2] == location_human, ]

Unnamed: 0,0,1,2,3
2,3_1,Removed,1,1.927923
5,6_1,Removed,1,1.94393
13,14_1,Removed,1,1.979502
17,18_1,Sampling,1,1.976157
18,19_1,Removed,1,1.918691
21,22_1,Sampling,1,1.904313
22,23_1,Sampling,1,1.890275
23,24_1,Removed,1,1.892277
33,34_1,Sampling,1,1.87446
34,35_1,Removed,1,1.727671


### Also need to get the following stats:
- Time of origin (age of root, really)
- date of first human transmision
- number of migrations to human
- total number of species jumps (this to be compared with the number of monophyletic groups)


In [203]:
location_target = '1' # The bat
location_trigger_sampling = '0' # After human sample
sampling_prob = 0.1 # sample bats with probability of 0.5

In [204]:
trigger_location = tree_reaction_location_age.iloc[:, 2] == location_trigger_sampling
subset_trigger_location = tree_reaction_location_age.loc[trigger_location, 3]
first_target = subset_trigger_location.min()
print(first_target)

1.2868389592339013


In [205]:
tree_reaction_location_age.head()

Unnamed: 0,0,1,2,3
0,1,Sampling,0,1.73508
1,2,Sampling,0,2.038165
2,3,Sampling,0,1.856948
3,4,Sampling,0,1.876671
4,5,Sampling,1,1.867781


In [206]:
# Editing code up to here!!
match_target_and_age = list()

for i in range(tree_reaction_location_age.shape[0]):
    match_location = tree_reaction_location_age.iloc[i, 2] == location_target 
    match_age = tree_reaction_location_age.iloc[i, 3] >  first_target
    if match_location and match_age: # If younger than target EXCLUDE with a probability of 1 - samp_prob
                                     # If older than target, remove always. This should produce a list 
                                     # of all tips to remove
        match_target_and_age.append(i)

match_target_and_age = np.array(match_target_and_age)

In [207]:
sampled = np.where(np.random.binomial(1, sampling_prob, len(match_target_and_age)) == 1)
sampled
print( match_target_and_age[sampled])
tree_reaction_location_age.loc[match_target_and_age[sampled], ]

[ 16  18  19  55  56  62  70  73 100 101]


Unnamed: 0,0,1,2,3
16,17,Sampling,1,2.11094
18,19,Sampling,1,2.079204
19,20,Removed,1,1.860565
55,56,Sampling,1,2.291279
56,57,Sampling,1,2.264211
62,63,Sampling,1,2.275008
70,71,Sampling,1,2.192449
73,74,Sampling,1,2.016424
100,101,Sampling,1,2.11367
101,102,Sampling,1,1.796743


In [208]:
match_target_and_age

array([  4,   5,  14,  15,  16,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  31,  32,  33,  38,  40,  43,  44,  52,  53,  55,  56,  57,
        59,  60,  61,  62,  65,  66,  67,  68,  69,  70,  71,  72,  73,
        75,  76,  77,  78,  79,  86,  87,  88,  91,  94,  95,  96,  97,
        98,  99, 100, 101, 102, 105, 108, 123, 124, 125])

In [209]:
np.array([i for i in range(100)])[np.random.binomial(1, 0.05, 100) == True]

array([24, 27, 30, 56, 74, 91, 96])

In [106]:
1.8-0.9

0.9