# Analyze counts of different canine coronaviruses

Get variables from `snakemake`:

In [None]:
input_csv = snakemake.input.csv
sample_metadata_csv = snakemake.params.sample_metadata
accessions = snakemake.params.accessions
metagenomic_descriptions = snakemake.params.metagenomic_descriptions
chart_html = snakemake.output.chart

Import Python modules:

In [None]:
import altair as alt

import pandas as pd

Read data:

In [None]:
sample_metadata = pd.read_csv(sample_metadata_csv)

df = (
    pd.read_csv(input_csv)
    .merge(
        sample_metadata.rename(columns={"Run accession": "accession"}),
        on="accession",
        validate="many_to_one",
    )
    .query("description in @metagenomic_descriptions")
    .groupby(["virus_id", "read_filtering", "Collection date"], as_index=False)
    .aggregate({"n_reads": "sum"})
    .assign(virus=lambda x: x["virus_id"].map(accessions))
)

df

In [None]:
collection_date = alt.selection_point(
    fields=["Collection date"],
    value="2020-01-12",
    bind=alt.binding_select(
        name="collection date",
        options=sorted(df["Collection date"].unique()),
    ),
)

chart = (
    alt.Chart(df)
    .add_params(collection_date)
    .transform_filter(collection_date)
    .encode(
        alt.X("n_reads", title="number of mapped reads"),
        alt.Y("virus"),
        alt.YOffset("read_filtering"),
        alt.Color("read_filtering", title="read filtering"),
        tooltip=df.columns.tolist(),
    )
    .mark_bar(stroke="black")
    .properties(width=250, height=alt.Step(15))
)

print(f"Saving to {chart_html}")
chart.save(chart_html)

chart