# Plot mutations per Pango clade relative to Wuhan-Hu-1 versus clade designation date

In [None]:
pango_consensus_seqs_json = None  # JSON with Pango sequences for each clade
chart_html = None  # output chart

In [None]:
import collections
import json

import altair as alt

import pandas as pd

_ = alt.data_transformers.disable_max_rows()

First, read all Pango clades and get their new mutations relative to parents and to reference:

In [None]:
with open(pango_consensus_seqs_json) as f:
    pango_clades = json.load(f)

records = collections.defaultdict(list)

for clade, clade_d in pango_clades.items():
    records["clade"].append(clade)
    records["date"].append(clade_d["designationDate"])
    records["muts_from_ref"].append(
        [
            mut
            for field in ["aaSubstitutions", "aaDeletions"]
            for mut in clade_d[field]
            if mut
        ]
    )

def get_descendants(clade, desc_list):
    desc_list.append(clade)
    for child in pango_clades[clade]["children"]:
        get_descendants(child, desc_list)
    return desc_list

xbb_clades = get_descendants("XBB", [])

pango_df = pd.DataFrame(records).query("date != ''").assign(
    spike_muts=lambda x: x["muts_from_ref"].apply(
        lambda ms: [m for m in ms if m.split(":")[0] == "S"]
    ),
    n_spike_muts=lambda x: x["spike_muts"].map(len),
    is_xbb_descendant=lambda x: x["clade"].isin(xbb_clades),
    date=lambda x: pd.to_datetime(x["date"]),
)

pango_df

In [None]:
chart = (
    alt.Chart(pango_df)
    .encode(
        alt.X(
            "date",
            title="clade designation date",
            scale=alt.Scale(nice=True),
            axis=alt.Axis(tickCount="year", labelOverlap=True),
        ),
        alt.Y(
            "n_spike_muts",
            title="spike mutations from Wu-Hu-1",
        ),
        alt.Color(
            "is_xbb_descendant",
            title="XBB clade",
            scale=alt.Scale(range=["gray", "orange"]),
            legend=alt.Legend(symbolOpacity=1, orient="top", titleOrient="left"),
        ),
        tooltip=["clade", "date", "n_spike_muts", "is_xbb_descendant"],
    )
    .mark_circle(opacity=0.25)
    .configure_axis(grid=False)
    .properties(width=160, height=150)
)

print(f"Saving to {chart_html}")
chart.save(chart_html)

chart