# Draws static version of mutations versus date scatter plot
This notebook uses a locally downloaded version of the [baltic](https://github.com/evogytis/baltic) module that has been manually edited to fix [this bug](https://github.com/evogytis/baltic/issues/38).

In [1]:
import altair as alt

import baltic

import pandas as pd

In [2]:
# Read tree
tree_json = "../auspice/early-SARS2-trees_crits-christoph2024-Jan-31-2020-lineage-A-C29095T-colormuts.json"
tree = baltic.loadJSON(tree_json, json_translation={"name": "name", "length": "divergence"})[0]

# get data frame with relevant node attributes
attributes = {
    "name": "name",
    "mutations_from_recCA": "mutations from ancestor (recCA)",
    "addtl_annotations": "sequence annotation",
    "date": "collection date",
}
records = []
for node in tree.getExternal():
    assert node.is_leaf()
    records.append([node.traits[attr] for attr in attributes])
df = (
    pd.DataFrame(records, columns=list(attributes))
    .assign(
        date=lambda x: pd.to_datetime(x["date"]),
        addtl_annotations=lambda x: x["addtl_annotations"].map(
            {
                "other": "other",
                "2020 market environment": "2020 Huanan Market environment",
                "2019 market sequence": "2019 Huanan Market case (WHO report)",
                "2019 non-market sequence": "2019 non-Huanan Market case (WHO report)",
            }
        )
    )
    .rename(columns=attributes)
    .groupby([attr for attr in attributes.values() if attr != "name"], as_index=False)
    .aggregate(**{"number of sequences": pd.NamedAgg("name", "count")})
)

# make chart
hexagon = "M0.866,-0.5 L0.866,0.5 L0,1 L-0.866,0.5 L-0.866,-0.5 L0,-1 Z"
chart = (
    alt.Chart(df)
    .encode(
        alt.X(
            "collection date",
            scale=alt.Scale(nice=False, padding=9),
            axis=alt.Axis(
                format='%b-%d-%Y',   # Dec-29-2020
                labelAngle=-90,        # rotate vertical
                tickCount=12,
            ),
        ),
        alt.Y(
            "mutations from ancestor (recCA)",
            scale=alt.Scale(zero=False, nice=False, padding=9),
        ),
        alt.Color("sequence annotation"),
        alt.Shape(
            "sequence annotation",
            scale=alt.Scale(range=["square", "diamond", hexagon, "circle"])
        ),
        alt.Size(
            "number of sequences",
            scale=alt.Scale(nice=False, zero=False, range=[50, 250]),
            legend=alt.Legend(values=[1, 5, 10, 20]),
        ),
    )
    .mark_point(filled=True, opacity=0.7, stroke="black", strokeWidth=0.75)
    .configure_axis(grid=False)
    .configure_legend(labelLimit=500)
    .properties(width=300, height=180)
)

chart.save("scatter.pdf")

chart


Tree height: 0.001510
Tree length: 0.077890

Numbers of objects in tree: 565 (90 nodes and 475 leaves)

