# Plot counts per lineage through time

## Imports

In [None]:
import altair as alt
import pandas as pd

## Configuration

In [None]:
min_date = "2018-01-01"

## Load metadata

In [None]:
def load_lineage_dates(metadata_file, lineage):
    df = pd.read_csv(metadata_file, sep="\t")
    df["lineage"] = lineage
    dates = df.loc[~df["date"].str.contains("X"), ["lineage", "date"]]
    
    return dates

In [None]:
h3n2_dates = load_lineage_dates("2022-05-31/metadata/metadata_h3n2_ha.tsv", "H3N2")

In [None]:
h1n1pdm_dates = load_lineage_dates("2022-05-31/metadata/metadata_h1n1pdm_ha.tsv", "H1N1pdm")

In [None]:
vic_dates = load_lineage_dates("2022-05-31/metadata/metadata_vic_ha.tsv", "Vic")

In [None]:
yam_dates = load_lineage_dates("2022-05-31/metadata/metadata_yam_ha.tsv", "Yam")

In [None]:
dates = pd.concat([h3n2_dates, h1n1pdm_dates, vic_dates, yam_dates])

In [None]:
dates.head()

In [None]:
dates.shape

In [None]:
recent_dates = dates[dates["date"] > min_date].copy()

In [None]:
recent_dates["date"] = pd.to_datetime(recent_dates["date"])

In [None]:
recent_dates.shape

In [None]:
binned_counts = recent_dates.set_index("date").groupby("lineage").resample("1MS").count().rename(columns={"lineage": "samples"}).reset_index()

In [None]:
binned_counts

In [None]:
alt.Chart(binned_counts).mark_line(point=True).encode(
    x=alt.X("date:T", title="Date"),
    y=alt.Y("samples:Q", title="Number of samples"),
    color=alt.Color("lineage:N", sort=["H3N2", "H1N1pdm", "Vic", "Yam"], title="Lineage"),
    tooltip=["lineage:N", "date:T", "samples:Q"],
).configure_axis(
    labelFontSize=14,
    titleFontSize=14,
).configure_legend(
    labelFontSize=14,
    titleFontSize=14,
).properties(
    width=900,
    height=400,
)