# TSINFER tutorial

In [None]:
import string
import numpy as np
import tsinfer
from tskit import MISSING_DATA

with tsinfer.SampleData(sequence_length=6) as sample_data:
    sample_data.add_site(0, [0, 1, 0, 0, 0], ["A", "T"], ancestral_allele=0)
    sample_data.add_site(1, [0, 0, 0, 1, 1], ["G", "C"], ancestral_allele=0)
    sample_data.add_site(2, [0, 1, 1, 0, 0], ["C", "A"], ancestral_allele=0)
    sample_data.add_site(3, [0, 1, 1, 0, 0], ["G", "C"], ancestral_allele=MISSING_DATA)
    sample_data.add_site(4, [0, 0, 0, 1, 1], ["A", "C"], ancestral_allele=0)
    sample_data.add_site(5, [0, 1, 2, 0, 0], ["T", "G", "C"], ancestral_allele=0)

In [None]:
ts = tsinfer.infer(sample_data)

In [None]:
ts

In [None]:
first_tree = ts.first()
print("Total branch length in first tree is", first_tree.total_branch_length, ts.time_units)
print("The first of", ts.num_trees, "trees is plotted below")
first_tree.draw_svg(y_axis=True)  # plot the tree: only useful for small trees

In [None]:
# Extra code to label and order the tips alphabetically rather than numerically
labels = {i: string.ascii_lowercase[i] for i in range(ts.num_nodes)}
genome_order = [n for n in ts.first().nodes(order="minlex_postorder") if ts.node(n).is_sample()]
labels.update({n: labels[i] for i, n in enumerate(genome_order)})
style1 = (
    ".node:not(.sample) > .sym, .node:not(.sample) > .lab {visibility: hidden;}"
    ".mut {font-size: 12px} .y-axis .tick .lab {font-size: 85%}")
sz = (800, 250)  # size of the plot, slightly larger than the default

# ticks = [0, 5000, 10000, 15000, 20000]
# get max generations time:
max_time = ts.node(ts.get_num_nodes() - 1).time
ticks = np.linspace(0, max_time, 5)
ts.draw_svg(
    size=sz, node_labels=labels, style=style1, y_label="Time ago",
    y_axis=True, y_ticks=ticks)