In [None]:
import collections

import sc2ts
import numpy as np
import tskit
import tszip
import numpy as np
from IPython.display import HTML
import warnings

import nb_utils

# NB - tag the cells with progressbars with `remove_cell`, and export only the output cells to PDF via:
# jupyter nbconvert --to webpdf --no-prompt --no-input --PDFExporter.scale_factor=0.8  --TagRemovePreprocessor.remove_cell_tags='{"remove_cell"}' --PDFExporter.margin_left=0.2cm --PDFExporter.margin_right=0.2cm Viridian-PangoX.ipynb
# A fancier option is to use the src/makepdf.py script, which creates hover-over labels.
# Here's how to use that to save Viridian-PangoX.pdf to the `figures` directory:
#  python src/makepdf.py notebooks/Viridian-PangoX.ipynb figures 

from IPython.display import HTML
HTML("""<style>@page {margin: 0.5cm;}</style>""")  # Allow space for copying patterns in the pdf

In [None]:
# Get the Viridian ARG
ts = nb_utils.load()

# Join with the associated data
df = sc2ts.node_data(ts)
ds = nb_utils.load_dataset()
df.set_index("sample_id", inplace=True)
df = df.join(ds.metadata.as_dataframe(["Viridian_pangolin"]))

hide_progress = True  # Set to `True` and rerun the nodebook to get a nicer version for PDF output

In [None]:
# Set which pango designation to use:
# Use "pango" to get the pango designations for all nodes computed by postprocessing the ARG.
# Use "Viridian_pangolin" to use the sample designations provided by Viridian.
Pango = "pango"

In [None]:
dfX = df[np.logical_and(df.is_sample, df[Pango].str.startswith("X"))]
pango_lineage_samples = df[df.is_sample].groupby(Pango)['node_id'].apply(list).to_dict()
pangoFullX = np.unique(dfX[Pango])
pangoX = [p for p in pangoFullX if "." not in p]
pangoSubX = [p for p in pangoFullX if "." in p]
display(HTML(
    f'<table><tr><th>{len(pangoX)} main pango-X lineages</th><th>{len(pangoSubX)} sub pango-X lineages</th></tr>'
    f'<tr><td>{", ".join(pangoX)}</td><td>{", ".join(pangoSubX)}</td></tr></table>'
))

In [None]:
print("Consensus mutations for each lineage taken from https://covidcg.org")
lineage_consensus_muts = nb_utils.read_in_mutations("../data/consensus_mutations.json.bz2")

In [None]:
# Find most recent RE node above all samples of each type
from tqdm.auto import tqdm
MRC_RE = {pango: (None, np.inf) for pango in pangoX}
recombination_nodes = set(np.where(ts.nodes_flags & sc2ts.NODE_IS_RECOMBINANT)[0])
nodes_time = ts.nodes_time
for tree in ts.trees():
    for x in pangoX:
        samples = pango_lineage_samples[x]
        if len(samples) == 0:
            continue
        u = samples[0] if len(samples) == 1 else tree.mrca(*samples)
        while u not in recombination_nodes:
            u = tree.parent(u)
            if u == tskit.NULL:
                break
        if u != tskit.NULL and nodes_time[u] < MRC_RE[x][1]:
            MRC_RE[x] = (u, nodes_time[u])

In [None]:
# This is a bit tedious, as we have to look at all samples in all trees
samples = {pango: set() for pango in pangoX}
for tree in tqdm(ts.trees(), disable=hide_progress):
    for pango, (potential_re, _) in MRC_RE.items():
        if potential_re is not None:
            samples[pango].update(tree.samples(potential_re))

In [None]:
pango_counts = {pango: collections.Counter() for pango in pangoX}
sample_to_pango = {}
for p, sample_ids in pango_lineage_samples.items():
    for s in sample_ids:
        sample_to_pango[s] = p
for pango, sample_set in samples.items():
    for s in sample_set:
        pango_counts[pango][sample_to_pango[s]] += 1

# Seemingly missing from Viridian QCed data
pango_counts["XD"] = None
pango_counts["XK"] = None
pango_counts["XT"] = None
pango_counts["XV"] = None
pango_counts["XAB"] = None
pango_counts["XAH"] = None
pango_counts["XAK"] = None
pango_counts["XAQ"] = None
pango_counts["XAR"] = None
pango_counts["XAT"] = None
pango_counts["XAW"] = None
pango_counts["XAY"] = None
pango_counts["XBA"] = None
pango_counts["XBC"] = None
# Others past XBH not added here

In [None]:
tot_pango_x_re = []
pango_x_nodes = collections.defaultdict(set)
td = '<td style="padding: 0.5px 2px">'
th = '<th style="padding: 0.5px 2px; font-style: italic">'
html =f'<table style="font-size: 8pt; margin-left: auto; margin-right: auto;"><tr><td colspan="2" style="padding: 0.5px 0px; font-size: smallest">Bold = main pango</td></tr>'
html += f'<tr>{th}RE node</th>{th}{Pango}</th>{th}parents</th>{th}break@</th>{th}# descendants</th>{th}Most common</th></tr>'
for pango in sorted(pango_counts, key=lambda x: (len(x), x)):
    if len(pango_lineage_samples.get(pango, [])) == 0:
        html += f'<tr>{td}</td>{td}<i>{pango}</i></td><td style="padding: 1px" colspan="2" style="text-align: center">not in dataset</td></tr>'
    else:
        counts = pango_counts[pango]
        tot = counts.total()
        p = counts[pango]
        most_common_X = None
        is_recomb = (p > 0 and p/tot > 0.001)
        re_nd = ""
        pg = f'<s>{pango}</s>'
        par = ''
        breakpnts=''
        if is_recomb:
            pango_x_nodes[MRC_RE[pango][0]].add(pango)
            most_common_X = max([x for x in counts if x.startswith("X")], key=lambda x: counts[x])
            re_nd = str(MRC_RE[pango][0])
            pg = f'{pango}'
            edges = ts.edges_child == MRC_RE[pango][0]
            par = {ts.edges_parent[e]: ts.edges_left[e] for e in np.where(edges)[0]}
            breakpnts = ", ".join({str(int(v)) for v in par.values()} - {'0', str(int(ts.sequence_length))})
            par = sorted(par.keys(), key=par.get)
            par = "/".join([pang for p_id in par for pang in df.loc[df.node_id == p_id, Pango]])
            if most_common_X == pango:
                tot_pango_x_re.append(MRC_RE[pango][0])
                re_nd = f'<b>{re_nd}</b>'
                pg = f'<b>{pango}</b>'
        html += (
            f'<tr>{td}{re_nd}</td>{td}{pg}</td>{td}{par}</td>{td}{breakpnts}</td>{td}{tot} of which {p} {pango}</td>'
            f'{td}{", ".join([p + ": " + str(c) for p, c in counts.most_common(3)])}</td></tr>'
        )
html += "</table>"
display(HTML(html))
print(len(pango_x_nodes),
      "total pango X recombinant origins of which",
      len(tot_pango_x_re),
      "include all descendants of the dominant group (exceptions: XM and XBB)")
#print("Exceptions = RE nodes:", set(pango_x_nodes.keys()) - set(tot_pango_x_re))
#print("RE node for Pangos", pango_x_nodes)

In [None]:
# Load in the ARG to the visualizer
arg = nb_utils.D3ARG_viz(ts, df, lineage_consensus_muts, pangolin_field=Pango, progress=not hide_progress)

In [None]:
arg.set_sc2ts_node_labels(progress=not hide_progress)
arg.d3arg.nodes.loc[arg.d3arg.nodes.id == 200039, 'label'] = "DELTA-origin"
arg.d3arg.nodes.loc[arg.d3arg.nodes.id == 822854, 'label'] = "BA.2-origin"
arg.d3arg.nodes.loc[arg.d3arg.nodes.id == 1189192, 'label'] = "BA.5-origin"
arg.set_sc2ts_node_styles()

# Pango-X Subgraphs

Below we display subgraphs for all the main PangoX lineages that have samples present in the _sc2ts_ ARG. Pango designations for both samples and internal nodes were assigned using XX TODO: fill out details XXX. For nodes with large numbers of descendants, only a selected sample of (say) 20-50 Pango X samples are shown. Extra descendants of a node are shown with dotted lines indicating additional immediate children of a node. In some cases, additional descendant nodes of different Pango designations (e.g. BA.2) are shown for context.

Recombination nodes are presented as larger circular nodes, with a Pango designation followed by the breakpoint position(s) surrounded by slashes, e.g. a breakpoint at position 1234 bp is indicated as **/1234/** (but note that PangoX lineages that are not of recombinant origin in sc2ts will not have a clear recombination node). Mutations within each subgraph (tickmarks along edges) are coloured pink if they are flagged as consensus mutations for those lineages: often such mutations occur in lineages above the PangoX origination node. Alternatively, if there are multiple mutations at the same site within a subgraph (indicating reversions or recurrent mutations) they plotted in a unique colour. For example, two green mutation tickmarks will represent mutations at the same site. If one is a reversion of a previous mutation (often indicating an unparsimonious reconstruction of topology), then the mutation is emphasised with a solid black outline. Deletion mutations are filled in black, and reversions of deletions (expected not to happen spontaneously) are magenta with a black outline.

In the PDF version of this document, hovering over node names will reveal the sample_id of a node, and hovering over a mutation will reveal the position of the mutation and the inherited vs derived state. E.g. a mouseover label of <code>mut:A1234T</code> denotes a mutation from an A to a T at position 1234 in the genome. Technially this is implemented by faking a URL (this leads to the slightly annoying behaviour that actually clicking on the hover-over text will attempt to open a non-existent URL).

In [None]:
# Scale all the viz versions for print, so that a standard 750 x 1000 subgraph fits onto one size of A4
display(HTML("<style>@media print {.d3arg {zoom: 0.8}}</style>"));
def txt(html, right="15em", top="15em", width="275px"):
    return (
        f'<div style="position: absolute; z-index:1; right:{right}; top:{top}; width:{width};'
        f'border:1px solid black; padding: 0.5em;">{html}</div>'
    )
def issue(issue_number):
    return f'See GitHub sc2ts-paper <a href="https://github.com/jeromekelleher/sc2ts-paper/issues/{issue_number}">issue #{issue_number}</a>'
class RecordPango:
    # Simply record all the calls to cls.pango()
    pangos = []
    def pango(self, pango_string):
        if type(pango_string) == str:
            self.pangos.append(pango_string)
        else:
            self.pangos += pango_string
        return pango_string
rec = RecordPango()

In [None]:
html = '''<p>XA traces to a very clean recombination node, with no important reversions etc.
Note that the causal sample (which triggers the initial recombination) is very close to the recombination
event, and therefore lacks C8090T which is one of the "XA consensus" mutations (identified as those shared by
over 90% of XA samples).</p>'''

arg.plot_pango_subgraph(
    rec.pango("XA"),
    txt(html, right="15em", top="27.5em"),
    y_axis_scale="rank",
    parent_pangos=("B.1.1.7", "B.1.177.18")
)

In [None]:
# XB has too many samples so we remove the immediate children of the XB root
exclude = np.unique(ts.edges_child[ts.edges_parent==223239])
exclude = exclude[exclude != 223230]

html = '''<p>XB is seemingly not a recombinant in the sc2ts ARG. Here we display only a few of the 192 XB samples</p>'''

arg.plot_pango_subgraph(
    rec.pango("XB"),
    txt(html, right="24em", top="10em"),
    child_levels=0,
    restrict_to_first=30,
    exclude=exclude,
    parent_pangos=["B.1.243"])

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XC"),
    parent_levels=5,
    y_axis_scale="rank",
    parent_pangos=["AY.29", "B.1.1.7"],
    oldest_y_label="2020-05",
)

In [None]:
html = '''<p>Some repeat sequences involving deletions just on the RHS of the breakpoint (see copying table below).
    Could these be misaligned?</p>
    <p>The 2 recombination nodes to the bottom right may be spurious.
    Possible alignment problems with the deletion here?</p>''' +  issue(337)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        rec.pango(["XE", "XH"]),
        txt(html, right="23em", width="32em", top="10em"),
        include=[1212052, 1177107],
        restrict_to_first=20,
        parent_pangos=["BA.1.17.2", "BA.2"],
        child_levels=0,
        parent_levels=5,
        height=700,  # make room for the copying pattern
        y_axis_scale="rank",
    )
HTML(
    "<style>table.copying-table {font-size: 8px; @media print {zoom: 0.6}} table.copying-table .pattern td {font-size: 0.5em; width:0.3em}</style>" +
    sc2ts.info.CopyingTable(ts, 965353).html(show_bases=None)
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XF"),
    txt("Looks a clean recombinant", right="30em"),
    parent_pangos=["BA.1", "AY.4"],
    y_axis_scale="rank"
)

In [None]:
html = (
    "<p>Fairly clean</p>"
)

arg.plot_pango_subgraph(
    rec.pango("XG"),
    txt(html, "25em"),
    parent_pangos=["BA.1.17", "BA.2.9"],
    y_axis_scale="rank"
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XJ"),
    include=[1090786],
    y_axis_scale="rank",
    parent_pangos=["BA.1.17.2", "BA.2"],
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XL"),
    y_axis_scale="rank",
    parent_pangos=["BA.1.17.2", "BA.2"]
)

In [None]:
html = "Multiple origins of XM, but one dominant one (node 1003220 at position 𝟐𝟏𝟓𝟗𝟓), which also contains XAL"

pangos = rec.pango(["XM", "XAL"])
colours = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77']  # from https://personal.sron.nl/~pault/
arg.plot_pango_subgraph(
    pangos, txt(html, right="30em", top="20.5em", width="25em"),
    parent_pangos=["BA.1.1", "BA.2"],
    child_levels=0,
    parent_levels=7,
    highlight_nodes={c: pango_lineage_samples[pX] for c, pX in zip(colours, pangos)},
    y_axis_scale="rank",
    height=700,
)

HTML(
    "<style>table.copying-table {font-size: 8px; @media print {zoom: 0.6}} table.copying-table .pattern td {font-size: 0.5em; width:0.3em}</style>" +
    sc2ts.info.CopyingTable(ts, 1003220).html(child_label="1003220", show_bases=None)
)

In [None]:
html = (
    "Clearly non-recombinant in sc2ts: the most basal XN is only one mutation different from an inferred BA.2 node. " +
    "The XAU lineage is a non-recombinant sister to XN, so also shown here " +
    issue(358)
)

pangoX = rec.pango(["XN", "XAU"])
cmap = {c: pango_lineage_samples[pX] for c, pX in zip(colours, ["XAU", "XN"])}

arg.plot_pango_subgraph(
    pangoX,
    txt(html),
    include=[1230542, 1235915, 1250403],
    parent_levels=7, child_levels=0,
    parent_pangos=["BA.2"],
    highlight_nodes=cmap,
    y_axis_scale="rank",
)

In [None]:

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        rec.pango("XP"),
        txt(
            "This is a known case where the PangoX recombinant is defined on the basis of a deletion " +
            "which is not common enough to map in sc2ts (and hence not shown here). If this deletion " +
            "is potentially recurrent, evidence from sc2ts indicates XP may not be a recombinant, " + 
            "as it is only one mutation different from BA.2 sample ERR8628084, and that mutation is " +
            "at position 29510, at the end of the genome and hence error-prone. " + issue(345),
            right="35em", top="25em"),
        parent_levels=12,
        parent_pangos=["BA.1.1"],
        y_axis_scale="rank",
    )

In [None]:
pangoX = rec.pango(["XQ", "XR", "XU", "XAA", "XAG", "XAM"])
html = (
    "The RH parent is clearly misidentified, and should be 1028784 (higher up the BA.2 branch), which would prevent 3 reversions. Presumably this inferred node was not available at the time of matching, or the .are a maximum of 10 samples from each group to display, plus some extra BA.2 samples that appear nested. " +
    issue(338)
)
extras1 = [1216524, 1240312, 1105611, 2534291, 2534290, 1202063, 1158324, 1080162]
extras2 = [1094287, 1058654]
#extras3 = [1126313, 2534274, 2534275, 1141965, 2508149, 1105611, 1142202, 1111753]

colours = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77']  # from https://personal.sron.nl/~pault/

arg.plot_pango_subgraph(
    pangoX,
    txt(html, right="14em", width="30em", top="10em"),
    include=extras1, # + extras2, # + extras3 + [1200258, 1158324],
    height=700,
    restrict_to_first=10,
    parent_levels=10,
    child_levels=0,
    parent_pangos=["BA.1.1.15", "BA.2.9"],
    highlight_nodes={c: pango_lineage_samples[pX] for c, pX in zip(colours, pangoX)},
    y_axis_scale="rank",
    oldest_y_label="2021-08",
)

HTML(
    "<style>table.copying-table {font-size: 7px; @media print {zoom: 0.4}} table.copying-table .pattern td {font-size: 0.4em; width:0.3em}</style>" +
    sc2ts.info.CopyingTable(ts, 1058654).html(child_label="XQ", show_bases=None)
)

In [None]:
html = (
    "Although there are two recombination nodes adjacent to each other. " +
    "The second node is likely to be an artifact  " +
    issue(287)
)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        rec.pango("XS"),
        txt(html, top="9em"),
        parent_levels=6,
        parent_pangos=["AY.103", "BA.1.1"],
        y_axis_scale="rank",
        oldest_y_label="2021-06",
        height=600,
    )

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XW"),
    parent_levels=6,
    parent_pangos=["BA.1.1.15", "BA.2"],
    oldest_y_label="2021-10",
    y_axis_scale="rank",
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XY"),
    parent_levels=6,
    parent_pangos=["BA.1.1", "BA.2"],
    oldest_y_label="2021-10",
    y_axis_scale="rank",
)

In [None]:
colours = ['#332288', '#88CCEE', '#44AA99', '#999933', '#DDCC77']  # from https://personal.sron.nl/~pault/
pangoX = rec.pango(["XZ", "XAC", "XAD", "XAE", "XAP"])
html = "<p>A good example of a nested set of Pango X designations. However, the two XAD samples are probably wrongly separated here: it would be more parsimonious to group them which could remove 4 recurrent mutations.</p>" + issue(339)

cmap = {c: pango_lineage_samples[pX] for c, pX in zip(colours, pangoX)}

keep_ids = list(df.loc[["SRR19689888", "ERR8146303", "ERR8163061", "SRR19689888"], 'node_id'])

cmap.update({'lightgrey': keep_ids})

arg.plot_pango_subgraph(
    pangoX,
    txt(html, right="35em"),
    include=keep_ids,
    parent_levels=4,
    highlight_nodes=cmap,
    height=700,
    oldest_y_label="2021-10",    
)

HTML(
    "<style>table.copying-table {font-size: 8px; @media print {zoom: 0.6}} table.copying-table .pattern td {font-size: 0.5em; width:0.3em}</style>" +
    sc2ts.info.CopyingTable(ts, 964555).html(show_bases=None)
)

In [None]:
html = "This looks complicated because it is entagled with the two upper RE nodes which are those created by the XE/XH recombinant. " + issue(360)
arg.plot_pango_subgraph(
    rec.pango("XAF"),
    txt(html),
    include=[1177107],
    parent_levels=5,
    child_levels=10,
    parent_pangos=["BA.2", "BA.1"],
    height=700,
    y_axis_scale="rank",
)

HTML(
    "<style>table.copying-table {font-size: 8px; @media print {zoom: 0.6}} table.copying-table .pattern td {font-size: 0.5em; width:0.3em}</style>" +
    sc2ts.info.CopyingTable(ts, 1177107).html(show_bases=None)
)

In [None]:
html = issue(352)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        rec.pango("XAJ"),
        txt("Not a recombinant. " + html, right="30em", top="20em"),
        parent_levels=9,
        parent_pangos=["BA.2.12"],
        y_axis_scale="rank",
        oldest_y_label="2021-09",    
    )

In [None]:
pangoX = rec.pango(["XAN", "XAV"])
cmap = {c: pango_lineage_samples[pX] for c, pX in zip(colours, pangos)}

arg.plot_pango_subgraph(
    pangoX,
    txt("Here, XAV is also included for context, as it shares some mutations with XAN. " + issue(353)),
    parent_levels=5,
    parent_pangos=["BA.5.1", "BA.5.1.24"],
    highlight_nodes=cmap,
    oldest_y_label="2022-02",
)

In [None]:
html = (
    "The proposed evidence to designate XAS is a single (likely functionally important) protein-truncating mutation in some BA.2 samples. So, it clearly falls behind our threshold of detection. This case also highlights some subjectivity when weighing evidence for recombination." + issue(340))

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        rec.pango("XAS"),
        parent_levels=5,
        parent_pangos=["BA.4"],
        oldest_y_label="2021-11",
        y_axis_scale="rank",
    )

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XAV"),
    txt(issue(354)),
    parent_levels=7, child_levels=1,
    parent_pangos=["BA.2"],
    oldest_y_label="2022-02",
)

In [None]:
html = (
    "There are a lot of XAZ samples, but they all form a clade, so just pick the first 20 for viz. " +
    issue(356)
)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        rec.pango("XAZ"),
        txt(html, right="30em", top="30em"),
        restrict_to_first=20,
        parent_levels=4, child_levels=0,
        parent_pangos=["BA.2"],
        oldest_y_label="2022-01",
    )

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        rec.pango("XBB"),
        restrict_to_first=20,
        #include=[1408964, 1396838, 1404568, 1423196, 1398292, 2681617, 1409763],
        include=[1429711, 1436032],
        parent_levels=8,
        parent_pangos=["BA.2.10", "BM.1.1.1"],
        oldest_y_label="2021-11",
        y_axis_scale="rank",
    )

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XBD"),
    y_axis_scale="rank",
    parent_levels=7,
    parent_pangos=["BA.5.2"],
    oldest_y_label="2022-02",
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XBE"),
    txt(issue(351), right="5em", top="20em"),
    parent_levels=7,
    parent_pangos=["BA.5.2"],
    oldest_y_label="2022-03",
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XBF"),
    parent_levels=7,
    parent_pangos=["BA.5.2.1", "CJ.1"],
    oldest_y_label="2022-03",
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XBG"),
    parent_levels=7,
    parent_pangos=["BA.2.76", "BA.5.2"],
    oldest_y_label="2022-03",
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XBH"),
    parent_levels=4,
    child_levels=3,
    include=[1379419],
    height=700,
    y_axis_scale="rank",
    parent_pangos=["BA.2.1", "BA.2.75.2"],
    oldest_y_label="2021-09",
)

In [None]:
colours = ['#332288', '#88CCEE', '#44AA99', '#999933', '#DDCC77']  # from https://personal.sron.nl/~pault/
pangoX = rec.pango(["XBK", "XBK.1", "XBQ", "CJ.1.3"])
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    arg.plot_pango_subgraph(
        pangoX,
        txt(issue(349), right="25em"),
        include = [1363939, 1342796],
        parent_levels=10,
        parent_pangos=["BM.1.1.1", "BM.1.1", "BA.2", "BA.2.75", "BA.2.75.3"],
        highlight_nodes={c: pango_lineage_samples[pX] for c, pX in zip(colours, pangoX)},
        oldest_y_label="2021-10",
    )

In [None]:
keep_ids = list(df.loc[["SRR21672613", "ERR10770184"], 'node_id'])


arg.plot_pango_subgraph(
    rec.pango("XBM"),
    parent_levels=6,
    child_levels=3,
    include=keep_ids,
    y_axis_scale="rank",
    parent_pangos=["BA.2.76", "BF.3"],
    oldest_y_label="2022-04",
)

In [None]:
arg.plot_pango_subgraph(
    rec.pango("XBR"),
    parent_levels=4,
    child_levels=0,
    height=500,
    parent_pangos=["BA.5.2"],
    oldest_y_label="2022-07",
)

In [None]:
# Check we have plotted all the not-None pangos in pango_counts
assert len({k for k, v in pango_counts.items() if v is not None} - set(rec.pangos)) == 0