In [1]:
import numpy as np
import toytree
import pandas as pd
import toyplot
import ipcoal
colormap = toyplot.color.brewer.map("BlueRed", reverse=True)

### names from tree

In [2]:
tree = toytree.tree("mammal_dat/MamPhy_fullPosterior_BDvr_DNAonly_4098sp_topoFree_NDexp_MCC_v2_target.tre",
                       tree_format=10)

In [3]:
trenames = np.array(["_".join(i.split("_")[:2]) for i in tree.get_tip_labels()])

### names from PanTHERIA

In [4]:
panth = pd.read_csv('./mammal_dat/PanTHERIA_1-0_WR05_Aug2008.txt',sep='\t')

In [5]:
panth.head()

Unnamed: 0,MSW05_Order,MSW05_Family,MSW05_Genus,MSW05_Species,MSW05_Binomial,1-1_ActivityCycle,5-1_AdultBodyMass_g,8-1_AdultForearmLen_mm,13-1_AdultHeadBodyLen_mm,2-1_AgeatEyeOpening_d,...,26-6_GR_MinLong_dd,26-7_GR_MidRangeLong_dd,27-1_HuPopDen_Min_n/km2,27-2_HuPopDen_Mean_n/km2,27-3_HuPopDen_5p_n/km2,27-4_HuPopDen_Change,28-1_Precip_Mean_mm,28-2_Temp_Mean_01degC,30-1_AET_Mean_mm,30-2_PET_Mean_mm
0,Artiodactyla,Camelidae,Camelus,dromedarius,Camelus dromedarius,3.0,492714.47,-999.0,-999.0,-999.0,...,-999.0,-999.0,-999,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
1,Carnivora,Canidae,Canis,adustus,Canis adustus,1.0,10392.49,-999.0,745.32,-999.0,...,-17.53,13.0,0,35.2,1.0,0.14,90.75,236.51,922.9,1534.4
2,Carnivora,Canidae,Canis,aureus,Canis aureus,2.0,9658.7,-999.0,827.53,7.5,...,-17.05,45.74,0,79.29,0.0,0.1,44.61,217.23,438.02,1358.98
3,Carnivora,Canidae,Canis,latrans,Canis latrans,2.0,11989.1,-999.0,872.39,11.94,...,-168.12,-117.6,0,27.27,0.0,0.06,53.03,58.18,503.02,728.37
4,Carnivora,Canidae,Canis,lupus,Canis lupus,2.0,31756.51,-999.0,1055.0,14.01,...,-171.84,3.9,0,37.87,0.0,0.04,34.79,4.82,313.33,561.11


In [6]:
panthnames = [i.split(" ") for i in panth['MSW05_Binomial']]

In [7]:
panthnames = np.array(["_".join(i) for i in panthnames])

### names from nature cons

In [8]:
natcons = pd.read_csv('./mammal_dat/5734-SP-2-Editor.csv',sep=',')

In [9]:
natcons.head()

Unnamed: 0,TaxID,Order,Family,Genus,Scientific_name,AdultBodyMass_g,Sources_AdultBodyMass,Max_longevity_d,Sources_Max_longevity,Rspan_d,AFR_d,Data_AFR,Calculated_GL_d,GenerationLength_d,Sources_GL
0,42641,Rodentia,Muridae,Abditomys,Abditomys latidens,268.09,PanTHERIA,no information,no information,no information,no information,no information,no information,639.631832,Mean_family_same_body_mass
1,17879,Rodentia,Muridae,Abeomelomys,Abeomelomys sevia,54.88,PanTHERIA,no information,no information,no information,no information,no information,no information,624.399641,Mean_family_same_body_mass
2,16,Rodentia,Cricetidae,Abrawayaomys,Abrawayaomys ruschii,62.99,PanTHERIA,no information,no information,no information,no information,no information,no information,589.388299,Mean_family_same_body_mass
3,42656,Rodentia,Abrocomidae,Abrocoma,Abrocoma bennettii,250.5,PanTHERIA,839.5,PanTHERIA;AnAge,no information,no information,no information,no information,1032.923574,Mean_order_same_mass
4,18,Rodentia,Abrocomidae,Abrocoma,Abrocoma boliviensis,158.0,PanTHERIA,no information,no information,no information,no information,no information,no information,1032.923574,Mean_order_same_mass


In [10]:
natconsnames = np.array(["_".join(q) for q in [i.split(" ") for i in natcons['Scientific_name']]])

### now see which names are common to all lists

In [11]:
intersection_names = []
for trename in trenames:
    if trename in panthnames:
        if trename in natconsnames:
            intersection_names.append(trename)

In [12]:
len(intersection_names)

3489

In [13]:
intersect_df = pd.DataFrame({'scientific_name':intersection_names})

In [14]:
intersect_df.head()

Unnamed: 0,scientific_name
0,Akodon_boliviensis
1,Akodon_spegazzinii
2,Akodon_sylvanus
3,Akodon_lutescens
4,Akodon_subfuscus


In [15]:
intersect_df.to_csv('mammal_dat/intersection_names.csv',index=False)

### now gather the data:

In [16]:
gl_dat = np.zeros((len(intersect_df),1))
sp_area_dat = np.zeros((len(intersect_df),1))

for idx, name in enumerate(intersect_df['scientific_name']):
    # using mean because some entries have multiple generation lengths (from different datasets)
    gl_dat[idx] = np.mean(natcons['GenerationLength_d'][natcons['Scientific_name'] == " ".join(name.split("_"))])
    sp_area_dat[idx] = np.mean(panth['26-1_GR_Area_km2'][panth['MSW05_Binomial'] == " ".join(name.split("_"))])

In [17]:
intersect_df['generation_length'] = gl_dat
intersect_df['species_area'] = sp_area_dat

In [18]:
intersect_df.head()

Unnamed: 0,scientific_name,generation_length,species_area
0,Akodon_boliviensis,589.388299,530195.66
1,Akodon_spegazzinii,589.388299,83477.58
2,Akodon_sylvanus,589.388299,48393.63
3,Akodon_lutescens,589.388299,318909.65
4,Akodon_subfuscus,589.388299,353423.95


In [19]:
intersect_df = intersect_df[~intersect_df.species_area.eq(-999)]

In [20]:
intersect_df.to_csv('mammal_dat/mammal_dat.csv',index=False)

### now trim the tree down to just the tips we want, and write that out as a new file:

In [21]:
intersect_df = pd.read_csv("mammal_dat/mammal_dat.csv")

In [22]:
intersect_df.head()

Unnamed: 0,scientific_name,generation_length,species_area
0,Akodon_boliviensis,589.388299,530195.66
1,Akodon_spegazzinii,589.388299,83477.58
2,Akodon_sylvanus,589.388299,48393.63
3,Akodon_lutescens,589.388299,318909.65
4,Akodon_subfuscus,589.388299,353423.95


In [23]:
tips = tree.get_tip_labels()

In [24]:
dropped_tips = np.array(tips)[~np.array([q in np.array(intersect_df.scientific_name) for 
                                         q in ["_".join(i.split("_")[0:2]) for i in tips]])]

In [25]:
tree = tree.drop_tips(list(dropped_tips))

In [26]:
# write the tree!
tree.write("mammal_dat/tipdrop_tree.newick")

### now let's actually code up the empirical example:

In [27]:
intersect_df = pd.read_csv("mammal_dat/mammal_dat.csv")
intersect_df.head()

Unnamed: 0,scientific_name,generation_length,species_area
0,Akodon_boliviensis,589.388299,530195.66
1,Akodon_spegazzinii,589.388299,83477.58
2,Akodon_sylvanus,589.388299,48393.63
3,Akodon_lutescens,589.388299,318909.65
4,Akodon_subfuscus,589.388299,353423.95


In [28]:
tree = toytree.tree("mammal_dat/tipdrop_tree.newick")

In [29]:
len(tree.get_tip_labels())

3121

In [30]:
intersect_df.shape[0]

3121

In [31]:
max_Ne = 150000
min_Ne = 50000
Ne_range = max_Ne - min_Ne

In [32]:
max_area = np.max(intersect_df.species_area)
min_area = np.min(intersect_df.species_area)
area_range = max_area - min_area

In [33]:
intersect_df['Ne'] = ( ((np.array(intersect_df.species_area)-min_area) / area_range) * Ne_range ) + min_Ne

In [34]:
intersect_df.head()

Unnamed: 0,scientific_name,generation_length,species_area,Ne
0,Akodon_boliviensis,589.388299,530195.66,50841.12241
1,Akodon_spegazzinii,589.388299,83477.58,50132.431984
2,Akodon_sylvanus,589.388299,48393.63,50076.773481
3,Akodon_lutescens,589.388299,318909.65,50505.930308
4,Akodon_subfuscus,589.388299,353423.95,50560.685096


### set tip vals on tree:

In [35]:
intersect_df["tip_label"] = np.array(tree.get_tip_labels())[np.array([np.argmax(np.array(["_".join(i.split("_")[:2]) for i in tree.get_tip_labels()]) == q) for q in intersect_df.scientific_name])]

In [36]:
intersect_df.head()

Unnamed: 0,scientific_name,generation_length,species_area,Ne,tip_label
0,Akodon_boliviensis,589.388299,530195.66,50841.12241,Akodon_boliviensis_CRICE...
1,Akodon_spegazzinii,589.388299,83477.58,50132.431984,Akodon_spegazzinii_CRICE...
2,Akodon_sylvanus,589.388299,48393.63,50076.773481,Akodon_sylvanus_CRICETID...
3,Akodon_lutescens,589.388299,318909.65,50505.930308,Akodon_lutescens_CRICETI...
4,Akodon_subfuscus,589.388299,353423.95,50560.685096,Akodon_subfuscus_CRICETI...


In [37]:
ne_dict = dict(zip(np.array(intersect_df.tip_label),np.array(intersect_df.Ne)))
g_dict = dict(zip(np.array(intersect_df.tip_label),np.array(intersect_df.generation_length)))

In [38]:
tree = tree.set_node_values("g",g_dict)
tree = tree.set_node_values("Ne",ne_dict)

### ancestral state reconstruction:

In [39]:
recon_tree = toytree.PCM.PCM(tree)
ntree = recon_tree.ancestral_state_reconstruction("g")

In [40]:
recon_tree = toytree.PCM.PCM(ntree)
ntree = recon_tree.ancestral_state_reconstruction("Ne")

### look at the tree with proper Ne values and g values:

In [41]:
mrca = ntree.get_mrca_idx_from_tip_labels(['Rhinolophus_landeri_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_alcyone_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_hipposideros_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_simulator_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_denti_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_swinnyi_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_capensis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_blasii_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_euryale_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_mehelyi_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_ferrumequinum_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_clivosus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_maclaudi_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_ruwenzorii_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_darlingi_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_fumigatus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_eloquens_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_formosae_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_luctus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_beddomei_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_sedulus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_trifoliatus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_pearsonii_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_yunanensis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_affinis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_stheno_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_coelophyllus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_shameli_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_arcuatus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_euryotis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_subrufus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_creaghi_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_rouxii_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_thomasi_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_sinicus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_malayanus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_acuminatus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_marshalli_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_paradoxolophus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_rex_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_macrotis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_cognatus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_lepidus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_pusillus_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_borneensis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_robinsoni_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_celebensis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_virgo_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_philippinensis_RHINOLOPHIDAE_CHIROPTERA',
 'Rhinolophus_megaphyllus_RHINOLOPHIDAE_CHIROPTERA'])

In [42]:
stree = ntree.treenode.search_nodes(idx=mrca)[0]

In [43]:
len(stree.get_descendants())

98

In [44]:
ntree.treenode.prune(stree.get_descendants())

In [45]:
ntree.ntips

50

In [46]:
s_plot = toytree.tree(ntree.treenode.detach())

In [47]:
s_plot = s_plot.mod.node_scale_root_height(s_plot.treenode.height*1e6)

In [48]:
s_plot.treenode.height

14572800.0

In [49]:
# draw new tree with reconstructed g values
s_plot.draw(
    ts='p', 
    #node_labels=recon_tree.get_node_values("Ne", 1, 1),
    node_colors=[colormap.colors(i, 
                                 np.min(s_plot.get_node_values("g",1,1)), 
                                 np.max(s_plot.get_node_values("g",1,1))
                                ) for i in s_plot.get_node_values('g', 1, 1)]
);

### adjust edge lengths to account for generation times:

In [50]:
ttree = s_plot.set_node_values(
    "dist",
    {i.name: i.dist / (i.g/365) for i in s_plot.get_feature_dict()}
)

In [51]:
ttree.draw(ts='p');

### simulate:

In [52]:
mod = ipcoal.Model(ttree, seed=333)
mod.sim_trees(nloci=5, nsites=1e5)
mod.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0,229,229,0,0,(((Rhinolophus_formosae_...
1,0,229,272,43,0,1,(((Rhinolophus_formosae_...
2,0,272,334,62,0,2,(((Rhinolophus_formosae_...
3,0,334,601,267,0,3,((((Rhinolophus_capensis...
4,0,601,614,13,0,4,((((Rhinolophus_capensis...
...,...,...,...,...,...,...,...
5686,4,99325,99694,369,0,1132,((Rhinolophus_hipposider...
5687,4,99694,99739,45,0,1133,((Rhinolophus_hipposider...
5688,4,99739,99790,51,0,1134,((Rhinolophus_hipposider...
5689,4,99790,99904,114,0,1135,((Rhinolophus_hipposider...


### results:

In [53]:
# draw linked genealogies
toytree.mtree(mod.df.genealogy).draw_tree_grid(tip_labels=False);

In [54]:
# draw unlinked genealogies
toytree.mtree(mod.df[mod.df.tidx==0].genealogy).draw_tree_grid(tip_labels=False);