# Merge viral and mitochondrial counts

In [None]:
import pandas as pd

viral = pd.read_csv(snakemake.input.viral)

mito = pd.read_csv(snakemake.params.mito)

sample_metadata = pd.read_csv(snakemake.params.sample_metadata)

metagenomic_descriptions = snakemake.params.metagenomic_descriptions

virus_names = snakemake.params.virus_names

In [None]:
assert set(viral["virus_id"]).issubset(virus_names)

df = (
    viral
    .merge(
        (
            sample_metadata
            .rename(columns={"Run accession": "accession", "Sample description": "description"})
            .query("description in @metagenomic_descriptions")
            [["accession", "Sample name"]]
        ),
        on="accession",
        validate="many_to_one",
    )
    .groupby(["virus_id", "Sample name"], as_index=False)
    .aggregate(virus_reads=pd.NamedAgg("n_reads", "sum"))
    .assign(virus_name=lambda x: x["virus_id"].map(virus_names))
    .merge(
        mito
        .rename(
            columns={
                "reference_id": "species_id",
                "common_name": "species_name",
                "aligned_reads": "species_reads",
            }
        )
        .query("description in @metagenomic_descriptions")
    )
    .drop(columns=["total_reads", "SARS2_aligned_reads", "description"])
)

df.to_csv(snakemake.output.csv, index=False)

df