# Notebook 4b: tetrad species tree inference

Infer a species tree using the SVDquartets algorithm implemented in tetrad.

In [5]:
import ipyrad.analysis as ipa
import numpy as np
import pandas as pd
import toytree

### Datafiles

In [6]:
SEQS = "../analysis-ipyrad/Strept_min4_outfiles/Strept_min4.seqs.hdf5"
SNPS = "../analysis-ipyrad/Strept_min4_outfiles/Strept_min4.snps.hdf5"

In [7]:
df = pd.read_csv("../data_sample_info/mastersheetSamples_RADstrep_20191017.csv")
keep = {'taxa': df[df['4balanced'] == 1].tip_label_ivalu.tolist()}
len(keep['taxa'])

48

### Infer a tetrad tree

In [3]:
tet = ipa.tetrad(
    data=SNPS,
    name="strept-tetrad-backbone",
    workdir="../analysis-tetrad",
    nquartets=2e6,
    nboots=100,   
)

loading snps array [103 taxa x 711484 snps]
max unlinked SNPs per quartet [nloci]: 39181
quartet sampler [random]: 2000000 / 4421275


In [4]:
tet.ipcluster['cores'] = 38
tet.run(force=True, show_cluster=True, auto=True)

Parallel connection | sacra: 38 cores
initializing quartet sets database
[####################] 100% 1:15:01 | full tree * | mean SNPs/qrt: 753  
[#################   ]  86% 1:15:19 | boot rep. 1 | mean SNPs/qrt: 754  
Keyboard Interrupt by user

Parallel connection closed.


### Draw the tree

In [11]:
# load tree, root, and drop outgroups
tre = toytree.tree("../analysis-tetrad/strept-min4-tetrad.tree")
tre = tre.root("A_thaliana_TAIR10")
tre = tre.drop_tips(["A_thaliana_TAIR10", "Sy_irio_NJ_3877", "reference", "S_irio"])

# draw with colored clades
canvas, axes, mark = tre.draw(
    height=1200,
    width=500,
    tip_labels_align=True,
    #edge_colors=tre.get_edge_values_mapped({160,161,168,174,178,181,192,195}),
    #node_labels="support",
    #node_labels_style={"-toyplot-anchor-shift": "-10px", "baseline-shift": "-10px"}
);