### Mutation distribution
Read in variants dataframe from dms-vep-pipeline-3 and plot distribution of number of codon substitutions per library and number of unique variants per library.

In [1]:
import pandas as pd

import altair as alt

import httpimport

In [2]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import main_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return main_theme.main_theme()


import_theme_new()

In [None]:
# Read in variants dataframe and filter out standard neutralization variants
variants_df = pd.read_csv(snakemake.input.variants_df)
variants_df = variants_df.query('target != "neut_standard"')
display(variants_df)

In [None]:
# Show distribution of number of codon substitutions per library
counts = variants_df.groupby(['library', 'n_codon_substitutions'])['target'].count().reset_index()
display(counts)

# Show number of unique variants per library
n_variants = variants_df.groupby(['library'])['barcode'].count().reset_index()
display(n_variants)

In [None]:
# Show distribution of number of codon substitutions per library
calculations = []
for library in counts['library'].unique():
    total = counts.query('library == @library')['target'].sum()
    for n_muts in [0,1,2,3,4,5]:
        n = counts.query('library == @library and n_codon_substitutions == @n_muts')['target'].values[0]
        print(f"Fraction of {n_muts} mutations in {library}: {n/total:.1%}")
    print(f'Total variants in {library}: {total}')

In [None]:
# Create Altair chart showing distribution of number of codon substitutions per library
mutation_distribution_chart = (
    alt.Chart(counts)
    .mark_bar()
    .encode(
        x=alt.X(
            "n_codon_substitutions:N",
            title=["Number of mutations"],
            scale=alt.Scale(domain=[0, 1, 2, 3, 4]),
        ),
        y=alt.Y("target:Q", title="Number of variants").stack(None),
        color=alt.Color("library:N", title="Library"),
        xOffset="library:N",
    )
    .properties(
        width=alt.Step(10),
        height=200,
    )
)

mutation_distribution_chart.save(snakemake.output.chart_png, ppi=300)
mutation_distribution_chart.save(snakemake.output.chart_svg)

In [None]:
# Create Altair chart showing number of unique barcodes per library
plot_number_barcodes = (
    alt.Chart(n_variants)
    .mark_bar(fill="#b8b0ac")
    .encode(
        x=alt.X("library:N", title="Library"),
        y=alt.Y("barcode:Q", title="Number of Barcodes"),
    )
    .properties(
        width=alt.Step(10),
        height=200,
    )
)

plot_number_barcodes.save(snakemake.output.barcodes_png, ppi=300)
plot_number_barcodes.save(snakemake.output.barcodes_svg)
