# Make plots

Import Python modules:

In [None]:
import itertools
import os
import warnings

import altair as alt

import numpy

import pandas as pd

import scipy.stats

_ = alt.data_transformers.disable_max_rows()

Get data and variable from `snakemake`:

In [None]:
metagenomic_descriptions = snakemake.params.metagenomic_descriptions

sars2_aligned_by_run = pd.read_csv(snakemake.input.sars2_aligned_by_run)

sars2_aligned_by_sample = (
    pd.read_csv(snakemake.input.sars2_aligned_by_sample)
    .rename(
        columns={"percent_preprocessed_reads_aligning_to_SARS2": "% reads aligning to SARS2"},
    )
    # correct spelling of "wing" in original metadata
    .assign(**{"Isolation source": lambda x: x["Isolation source"].str.replace("ine ", "ing ")})
    # only keep metagenomic samples
    .query("description in @metagenomic_descriptions")
)

mito_composition_by_run = pd.read_csv(snakemake.input.mito_composition_by_run)

mito_composition_by_sample = (
    pd.read_csv(snakemake.input.mito_composition_by_sample)
    .assign(**{"Isolation source": lambda x: x["Isolation source"].str.replace("ine", "ing ")})
    .query("description in @metagenomic_descriptions")
)

crits_christoph_read_counts = pd.read_csv(snakemake.input.crits_christoph_read_counts)

ngdc_to_crits_christoph = pd.read_csv(snakemake.input.ngdc_to_crits_christoph)

output_plots = snakemake.output

## Compare mitochondrial DNA composition to Crits-Christoph et al
First get Crits-Christoph read counts in tidy format, assigning NGDC run accessions and summing counts for runs:

In [None]:
crits_christoph_read_counts_tidy = (
    crits_christoph_read_counts
    .drop(columns=["Location", "Sample_name"])
    .merge(
        ngdc_to_crits_christoph
        .assign(Filename=lambda x: x["fastq Crits-Christoph"].map(lambda s: s.split(".")[0]))
        .rename(columns={"Run accession NGDC": "Run accession"})
        [["Filename", "Run accession"]]
        .drop_duplicates(),
        validate="one_to_one",
    )
    .merge(
        sars2_aligned_by_run[["Run accession", "sample"]].drop_duplicates(),
        validate="many_to_one",
        how="left",
    )
    .melt(
        id_vars=["sample", "Run accession", "Filename"],
        var_name="species",
        value_name="aligned reads Crits-Christoph",
    )
    .groupby(["sample", "Run accession", "species"], as_index=False)
    .aggregate({"aligned reads Crits-Christoph": "sum"})
)

crits_christoph_read_counts_tidy

Now merge current read counts with those from Crits-Christoph for all species:

In [None]:
crits_christoph_species = crits_christoph_read_counts_tidy["species"].unique().tolist()

mito_composition_to_compare = (
    mito_composition_by_run
    [["Run accession", "species", "common_name", "aligned_reads", "covered_bases"]]
    .rename(
        columns={
            "aligned_reads": "aligned reads current study",
            "covered_bases": "covered bases current study",
        }
    )
)

assert set(crits_christoph_species).issubset(mito_composition_to_compare["species"])

crits_christoph_vs_current = (
    crits_christoph_read_counts_tidy
    .merge(
        mito_composition_to_compare,
        validate="one_to_one",
        on=["Run accession", "species"],
    )
)

Now get the correlations by species and plot them:

In [None]:
crits_christoph_vs_current_species_corr = (
    crits_christoph_vs_current
    .groupby(["species", "common_name"])
    [["aligned reads Crits-Christoph", "aligned reads current study"]]
    .corr(method="pearson")
    .reset_index()
    .query("level_2 == 'aligned reads Crits-Christoph'")
    .rename(columns={"aligned reads current study": "correlation"})
    .drop(columns=["level_2", "aligned reads Crits-Christoph"])
    .query("correlation.notnull()")
    .merge(
        crits_christoph_vs_current
        .groupby("species", as_index=False)
        .aggregate(
            aligned_reads_Crits_Christoph=pd.NamedAgg("aligned reads Crits-Christoph", "sum"),
            aligned_reads_current_study=pd.NamedAgg("aligned reads current study", "sum"),
        )
    )
    .sort_values("correlation")
)

crits_christoph_vs_current_species_corr

In [None]:
crits_christoph_vs_current_species_corr_chart = (
    alt.Chart(crits_christoph_vs_current_species_corr)
    .encode(
        x="correlation",
        y=alt.Y(
            "common_name",
            sort=alt.SortField("correlation", order="descending"),
            title=None,
        ),
        tooltip=[
            alt.Tooltip(c, format=".3g")
            if crits_christoph_vs_current_species_corr[c].dtype == float
            else c
            for c in crits_christoph_vs_current_species_corr.columns.tolist()
        ],
    )
    .mark_circle(size=50, opacity=1)
    .properties(
        height=alt.Step(13),
        width=185,
    )
) 
    
crits_christoph_vs_current_species_corr_chart

Now get the correlations by run:

In [None]:
crits_christoph_vs_current_run_corr = (
    crits_christoph_vs_current
    .groupby(["Run accession", "sample"])
    [["aligned reads Crits-Christoph", "aligned reads current study"]]
    .corr(method="pearson")
    .reset_index()
    .query("level_2 == 'aligned reads Crits-Christoph'")
    .rename(columns={"aligned reads current study": "correlation"})
    .drop(columns=["level_2", "aligned reads Crits-Christoph"])
    .query("correlation.notnull()")
    .merge(
        crits_christoph_vs_current
        .groupby("Run accession", as_index=False)
        .aggregate(
            aligned_reads_Crits_Christoph=pd.NamedAgg("aligned reads Crits-Christoph", "sum"),
            aligned_reads_current_study=pd.NamedAgg("aligned reads current study", "sum"),
        )
    )
)

crits_christoph_vs_current_run_corr.sort_values("correlation")

Plot the correlations by run:

In [None]:
aligned_reads_current_slider = alt.param(
    bind=alt.binding_range(
        name="minimum aligned reads in current study",
        min=0,
        max=1000,
    ),
    value=50,
)

aligned_reads_crits_christoph_slider = alt.param(
    bind=alt.binding_range(
        name="minimum aligned reads in Crits-Christoph et al",
        min=0,
        max=1000,
    ),
    value=50,
)

crits_christoph_vs_current_run_corr_chart = (
    alt.Chart(crits_christoph_vs_current_run_corr)
    .encode(
        x=alt.X(
            "Run accession",
            sort=alt.SortField("correlation", order="descending"),
        ),
        y=alt.Y("correlation", title="Pearson correlation"),
        tooltip=[
            alt.Tooltip(c, format=".4g")
            if crits_christoph_vs_current_run_corr[c].dtype == float
            else c
            for c in crits_christoph_vs_current_run_corr.columns.tolist()
        ],
    )
    .mark_circle(size=50, opacity=1)
    .properties(
        width=alt.Step(11),
        height=150,
        title=(
            "Correlation in read counts aligned to mitochondrial genomes of different "
            "species in Crits-Christoph et al and current study"
        )
    )
    .add_params(aligned_reads_current_slider, aligned_reads_crits_christoph_slider)
    .transform_filter(
        alt.datum["aligned_reads_current_study"] >= aligned_reads_current_slider
    )
    .transform_filter(
        alt.datum["aligned_reads_Crits_Christoph"] >= aligned_reads_crits_christoph_slider
    )
)

crits_christoph_vs_current_run_corr_chart.save(
    output_plots["crits_christoph_vs_current_run_corr"]
)

crits_christoph_vs_current_run_corr_chart

## Plot mitochondrial composition for samples
First, for each sample we get just the species that are sufficiently abundant in either the full species set or the Crits-Christoph species set, and call other species "other":

In [None]:
assert set(crits_christoph_species).issubset(mito_composition_by_sample["species"])

group_as_other_cutoff = 5  # group as "other" if < this %
max_species_per_sample = 10  # group as other lower percents so we never get more than this many

# require at least this many reads aligned to mitochondrial genomes
min_mito_reads = 100

composition_df = (
    mito_composition_by_sample
    .assign(
        cc_species=lambda x: x["species"].isin(crits_christoph_species),
        total_mito=lambda x: x.groupby("sample")["aligned_reads"].transform("sum"),
        aligned_reads_cc=lambda x: x["aligned_reads"].where(x["cc_species"], 0),
        total_mito_cc=lambda x: x.groupby("sample")["aligned_reads_cc"].transform("sum"),
        percent=lambda x: 100 * x["aligned_reads"] / x["total_mito"],
        percent_cc=lambda x: 100 * x["aligned_reads_cc"] / x["total_mito_cc"],
        rank=lambda x: x.groupby("sample")["aligned_reads"].transform("rank", method="first", ascending=False),
        other=lambda x: (
            (x[["percent", "percent_cc"]].max(axis=1) < group_as_other_cutoff)
            | (x["rank"] >= max_species_per_sample)
        ),
        species=lambda x: x["species"].where(~x["other"], "other"),
        common_name=lambda x: x["common_name"].where(~x["other"], "other"),
    )
    .query("total_mito > @min_mito_reads")
    .groupby(["sample", "species", "common_name"], as_index=False)
    [["aligned_reads", "aligned_reads_cc", "percent", "percent_cc"]]
    .aggregate("sum")
)

composition_df = (
    composition_df
    .rename(columns={"aligned_reads": "all", "aligned_reads_cc": "crits_christoph"})
    .melt(
        id_vars=["sample", "species", "common_name"],
        value_vars=["all", "crits_christoph"],
        value_name="aligned_reads",
        var_name="species_set",
    )
    .merge(
        composition_df
        .rename(columns={"percent": "all", "percent_cc": "crits_christoph"})
        .melt(
            id_vars=["sample", "species", "common_name"],
            value_vars=["all", "crits_christoph"],
            value_name="percent",
            var_name="species_set",
        )
    )
    .assign(
        species_set=lambda x: x["species_set"].map(
            {
                "all": "chordates",
                "crits_christoph": "mammals analyzed in Crits-Christoph et al",
            }
        ),
    )
)

composition_df

Now make pie charts:

In [None]:
sample_selection = alt.selection_point(
    fields=["sample"],
    bind=alt.binding_select(
        options=composition_df["sample"].unique(),
        name="sample",
    ),
    value="Q61",
)

species_set_selection = alt.selection_point(
    fields=["species_set"],
    bind=alt.binding_select(
        options=[None, *composition_df["species_set"].unique()],
        labels=["all", *composition_df["species_set"].unique()],
        name="species set(s) to show",
    ),
)

mito_composition_chart = (
    alt.Chart(composition_df)
    .encode(
        theta="percent",
        color=alt.Color(
            "common_name",
            title="species (common name)",
            legend=alt.Legend(
                orient="right", columns=1, symbolSize=125, symbolType="square", labelLimit=0,
            ),
        ),
        column=alt.Column(
            "species_set",
            title=None,
            sort="descending",
            header=alt.Header(labelFontSize=11, labelFontWeight="bold"),
        ),
        tooltip=[
            alt.Tooltip(c, format=".3g") if composition_df[c].dtype == float else c
            for c in composition_df.columns
        ],
    )
    .mark_arc()
    .add_params(sample_selection, species_set_selection)
    .transform_filter(sample_selection)
    .transform_filter(species_set_selection)
    .properties(height=175, width=175)
)

mito_composition_chart.save(output_plots["mito_composition"])

mito_composition_chart

## Distribution of amount of SARS-CoV-2 per sample
First add annotations of top species to number of SARS2 aligned reads per sample:

In [None]:
top_species_annotation = (
    composition_df
    .query("species_set == 'chordates (current study)'")
    .assign(
        top_species=lambda x: x["common_name"] + " (" + x["percent"].round().astype(int).astype(str) + "%)"
    )
    .sort_values("percent", ascending=False)
    .groupby("sample", as_index=False)
    .aggregate({"top_species": lambda s: ", ".join(s)})
)

sars2_aligned_df = sars2_aligned_by_sample.merge(top_species_annotation, how="left")

In [None]:
date_selection = alt.selection_point(
    fields=["Collection date"],
    bind="legend",
    toggle="true",
)

source_selection = alt.selection_point(
    fields=["Isolation source"],
    bind=alt.binding_select(
        options=[None, *sars2_aligned_df["Isolation source"].unique()],
        labels=["all", *sars2_aligned_df["Isolation source"].unique()],
        name="sample isolation source",
    ),
)

sars2_aligned_chart_base = (
    alt.Chart(sars2_aligned_df.query("preprocessed_reads > 0"))
    .encode(
        color=alt.Color(
            "Collection date:O",
            scale=alt.Scale(
                domain=sorted(sars2_aligned_df["Collection date"].unique()),
                scheme="viridis",
                reverse=True,
            ),
            legend=alt.Legend(orient="bottom", columns=7),
        ),
        tooltip=[
            alt.Tooltip(c, format=".3~g")
            if sars2_aligned_df[c].dtype in [float, int]
            else c
            for c in sars2_aligned_df.columns
        ],
    )
    .mark_point(size=60, opacity=1, filled=True)
    .add_params(date_selection, source_selection)
    .transform_filter(source_selection)
    .transform_filter(date_selection)
)

sars2_aligned_chart = (
    sars2_aligned_chart_base
    .encode(
        x=alt.X(
            "sample",
            sort=alt.SortField("% reads aligning to SARS2", order="descending"),
        ),
        y=alt.Y(
            "% reads aligning to SARS2",
            title="% reads aligning to SARS2 (symlog scale)",
            scale=alt.Scale(type="symlog", constant=0.00001),
        ),
    )
    .properties(width=alt.Step(11), height=220)
)

sars2_aligned_chart.save(output_plots["sars2_aligned"])

sars2_aligned_chart

Make a vertical version of the same chart:

In [None]:
sars2_aligned_chart_vertical = (
    sars2_aligned_chart_base
    # some transforms to facet into two columns
    .transform_window(
        rank="row_number()",
        sort=[
            alt.SortField("% reads aligning to SARS2"),
        ],
    )
    .transform_joinaggregate(max_rank="max(rank)")
    .transform_calculate(
        facet=alt.expr.if_(alt.datum["rank"] / alt.datum["max_rank"] > 0.5, 1, 2),
    )
    .encode(
        y=alt.Y(
            "sample",
            title=None,
            sort=alt.SortField("rank", order="descending"),
        ),
        x=alt.X(
            "% reads aligning to SARS2",
            title="% reads aligning to SARS2 (symlog scale)",
            scale=alt.Scale(type="symlog", constant=0.00001),
        ),
        column=alt.Column("facet:N", title=None, header=alt.Header(labels=False)),
        tooltip=["rank:Q", "max_rank:Q", "facet:O"],
    )
    .properties(height=alt.Step(9), width=200)
    .configure_legend(orient="right")
    .resolve_scale(y="independent")
)

sars2_aligned_chart.save(output_plots["sars2_aligned_vertical"])

sars2_aligned_chart_vertical

## Per species correlations with SARS2 reads
Make plots that correlation percent of reads aligned to SARS2 with percent aligned to each species.
To enable log scaled plotting, when a percent is zero we set it to half the lowest non-zero percent observed across all samples / species.

In [None]:
per_species_corr_df = (
    mito_composition_by_sample
    .drop(columns=["reference_id", "species"])
    .assign(
        percent_SARS2=lambda x: 100 * x["SARS2_aligned_reads"] / x["preprocessed_reads"],
        percent_species=lambda x: 100 * x["aligned_reads"] / x["preprocessed_reads"],
        total_mitochondrial_reads=lambda x: x.groupby("sample")["aligned_reads"].transform("sum"),
        percent_of_mitochondrial_reads=lambda x: x["aligned_reads"] / x["total_mitochondrial_reads"] * 100,
        axis_scale="linear scale with exact %",
    )
)

per_species_corr_df_log = (
    per_species_corr_df
    .assign(
        percent_SARS2=lambda x: numpy.log10(
            x["percent_SARS2"].clip(lower=x.query("percent_SARS2 > 0")["percent_SARS2"].min() / 2)
        ),
        percent_species=lambda x: numpy.log10(
            x["percent_species"].clip(lower=x.query("percent_species > 0")["percent_species"].min() / 2)
        ),
        percent_of_mitochondrial_reads=lambda x: numpy.log10(
            x["percent_of_mitochondrial_reads"].clip(
                lower=x.query("percent_of_mitochondrial_reads > 0")["percent_of_mitochondrial_reads"].min() / 2
            )
        ),
        axis_scale="log10 of % (zero plotted as minimum non-zero value)",
    )
)

per_species_corr_df = (
    pd.concat([per_species_corr_df, per_species_corr_df_log], ignore_index=True)
    .rename(
        columns={
            "aligned_reads": "species_mitochondrial_reads",
            "percent_SARS2": "% SARS2 reads",
            "common_name": "species (common name)",
            "percent_species": "% of all preprocessed reads",
            "percent_of_mitochondrial_reads": "% of all mitochondrial reads",
        }
    )
    .merge(top_species_annotation, how="left")
)

# melt on how mitochondrial read percent computed
value_vars = ["% of all preprocessed reads", "% of all mitochondrial reads"]
per_species_corr_df = (
    per_species_corr_df
    .melt(
        id_vars=[c for c in per_species_corr_df.columns if c not in value_vars],
        value_vars=value_vars,
        value_name="% species mitochondria reads",
        var_name="% species mitochondria reads computed as",
    )
)

per_species_corr_df

First make base chart:

In [None]:
source_selection = alt.selection_point(
    fields=["Isolation source"],
    bind=alt.binding_select(
        options=[None, *per_species_corr_df["Isolation source"].unique()],
        labels=["all", *per_species_corr_df["Isolation source"].unique()],
        name="sample isolation source",
    ),
)

axis_scale_selection = alt.selection_point(
    fields=["axis_scale"],
    bind=alt.binding_select(
        options=per_species_corr_df["axis_scale"].unique(),
        name="axis scale",
    ),
    value="log10 of % (zero plotted as minimum non-zero value)",
)

species_selection = alt.selection_point(
    fields=["species (common name)"],
    bind=alt.binding_select(
        options=sorted(per_species_corr_df["species (common name)"].unique()),
        name="species (common name)",
    ),
    value="raccoon dog",
)

mito_percent_selection = alt.selection_point(
    fields=["% species mitochondria reads computed as"],
    bind=alt.binding_select(
        options=per_species_corr_df["% species mitochondria reads computed as"].unique(),
        name="% species mitochondria reads computed as",
    ),
    value= "% of all preprocessed reads",
)

min_mito_reads = 200

mitochondrial_reads_slider = alt.param(
    value=min_mito_reads,
    bind=alt.binding_range(
        min=0, max=1000, name="minimum total mitochondrial reads to show sample",
    ),
)

per_species_corr_chart_base = (
    alt.Chart(per_species_corr_df)
    .add_params(
        date_selection,
        axis_scale_selection,
        mito_percent_selection,
        source_selection,
        mitochondrial_reads_slider,
    )
    .transform_filter(mito_percent_selection)
    .transform_filter(source_selection)
    .transform_filter(date_selection)
    .transform_filter(alt.datum["total_mitochondrial_reads"] >= mitochondrial_reads_slider)
    .transform_filter(axis_scale_selection)
)
    
per_species_corr_chart_scatter_base = (
    per_species_corr_chart_base
    .encode(
        y=alt.Y("% SARS2 reads", scale=alt.Scale(zero=False)),
        x=alt.X("% species mitochondria reads", scale=alt.Scale(zero=False)),
        color=alt.Color(
            "Collection date:O",
            scale=alt.Scale(
                range=["black"] * per_species_corr_df["Collection date"].nunique(),
                domain=sorted(per_species_corr_df["Collection date"].unique()),
            ),
            legend=alt.Legend(orient="bottom", columns=7),
        ),
        tooltip=[
            alt.Tooltip(c, format=".3~g")
            if per_species_corr_df[c].dtype in [float, int]
            else c
            for c in per_species_corr_df.columns
            if c not in {
                "axis_scale",
                "total_mitochondrial_reads",
                "% species mitochondria reads computed as",
            }
        ],
    )
    .mark_circle(opacity=0.5, size=50)
)

# correlation coefficient
params_r = (
    per_species_corr_chart_base
    .transform_regression(
        "% species mitochondria reads", "% SARS2 reads", params=True,
    )
    .transform_calculate(
        r=alt.expr.if_(
            alt.datum["coef"][1] >= 0,
            alt.expr.sqrt(alt.datum["rSquared"]),
            -alt.expr.sqrt(alt.datum["rSquared"]),
        ),
        label='"r = " + format(datum.r, ".2f")',
    )
    .mark_text(align="left", color="orange", fontWeight=500, fontSize=12)
    .encode(
        x=alt.value(6),
        y=alt.value(8),
        text=alt.Text("label:N"),
    )
)

Now use base to make faceted chart:

In [None]:
per_species_corr_chart_faceted = (
    (per_species_corr_chart_scatter_base + params_r)
    .properties(width=150, height=150)
    .facet(
        alt.Facet(
            "species (common name)",
            title=None,
            header=alt.Header(labelPadding=3, labelFontSize=12, labelFontWeight=500),
        ),
        columns=8,
        spacing=12,
    )
    .configure(padding=25)
    .configure_axis(grid=False)
)

per_species_corr_chart_faceted.save(
    output_plots["per_species_corr_faceted"]
)

per_species_corr_chart_faceted

And use base to make single-species selection chart:

In [None]:
per_species_corr_chart_single = (
    (per_species_corr_chart_scatter_base + params_r)
    .properties(width=200, height=200)
    .add_params(species_selection)
    .transform_filter(species_selection)
    .configure(padding=25)
    .configure_axis(grid=False)
)

per_species_corr_chart_single.save(
    output_plots["per_species_corr_single"]
)

per_species_corr_chart_single

## Overall correlations of SARS-CoV-2 and reads from species
We filter samples on total mitochondrial reads first.

In [None]:
warnings.simplefilter("ignore")

overall_corr_dfs = []
for date, source in itertools.product(
    ["all", *per_species_corr_df["Collection date"].unique()],
    ["all", *per_species_corr_df["Isolation source"].unique()],
):
    df = per_species_corr_df.query("total_mitochondrial_reads >= @min_mito_reads")
    if date != "all":
        df = df.query("`Collection date` == @date")
    if source != "all":
        df = df.query("`Isolation source` == @source")
    if (len(df) == 0):
        continue
    overall_corr_dfs.append(
        df
        .groupby(
            ["species (common name)", "axis_scale", "% species mitochondria reads computed as"],
            as_index=False,
        )
        .apply(
            lambda x: pd.Series(
                {
                    "Pearson correlation": x["% SARS2 reads"].corr(
                        x["% species mitochondria reads"], method="pearson",
                    ),
                    "Spearman correlation": x["% SARS2 reads"].corr(
                        x["% species mitochondria reads"], method="spearman",
                    ),
                }
            )
        )
        .assign(**{"Collection date": date, "Isolation source": source})
        .query("`Pearson correlation`.notnull() and `Spearman correlation`.notnull()")
    )

overall_corr_df = pd.concat(overall_corr_dfs, ignore_index=True)

overall_corr_df

In [None]:
source_selection = alt.selection_point(
    fields=["Isolation source"],
    bind=alt.binding_select(
        options=overall_corr_df["Isolation source"].unique(),
        name="sample isolation source",
    ),
    value="all",
)

date_selection = alt.selection_point(
    fields=["Collection date"],
    bind=alt.binding_select(
        options=sorted(overall_corr_df["Collection date"].unique()),
        name="sample collection date",
    ),
    value="all",
)

species_selection = alt.selection_point(
    fields=["species (common name)"],
    bind=alt.binding_radio(
        options=[None, *sorted(overall_corr_df["species (common name)"].unique())],
        labels=["none", *sorted(overall_corr_df["species (common name)"].unique())],
        name="species to highlight",

    ),
    empty=False,
)

overall_corr_chart = (
    alt.Chart(overall_corr_df)
    .encode(
        x=alt.X("Spearman correlation", scale=alt.Scale(domain=(-1, 1))),
        y=alt.Y("Pearson correlation", scale=alt.Scale(domain=(-1, 1))),
        tooltip=[
            alt.Tooltip(c, format=".3f") if overall_corr_df[c].dtype == float else c
            for c in overall_corr_df.columns.tolist()
            if c not in [
                "axis_scale",
                "Collection date",
                "Isolation source",
                "% species mitochondria reads computed as",
            ]
        ],
        size=alt.condition(species_selection, alt.value(100), alt.value(50)),
        strokeWidth=alt.condition(species_selection, alt.value(2), alt.value(0)),
        opacity=alt.condition(species_selection, alt.value(1), alt.value(0.5)), 
    )
    .mark_circle(color="black", size=50, stroke="red")
    .properties(width=200, height=200)
    .configure_axis(grid=False)
    .add_params(
        species_selection,
        source_selection,
        axis_scale_selection,
        date_selection,
        mito_percent_selection,
    )
    .transform_filter(source_selection)
    .transform_filter(mito_percent_selection)
    .transform_filter(axis_scale_selection)
    .transform_filter(date_selection)
)

overall_corr_chart.save(output_plots["overall_corr"])

overall_corr_chart