# Viral gene-progeny relationship
This notebook plots the number of viral genes found in cells that produce or do not produce viral progeny

Import python modules:


In [None]:
from IPython.display import display

from dms_variants.constants import CBPALETTE

import pandas as pd

import plotnine as p9

Hardcode variables for now:

In [None]:
viral_genes_by_cell_csv = snakemake.input.viral_genes_by_cell_csv
contributes_progeny_by_cell_csv = snakemake.input.contributes_progeny_by_cell_csv
expt = snakemake.wildcards.expt
plot = snakemake.output.plot

Style parameters. *N.b.* `CBPALETTE` is defined in imports above.

In [None]:
p9.theme_set(p9.theme_classic())

## Load data
Load data on viral genes in each infected cell and whether each infected cell contributes progeny.

*N.b. viral genes CSV contains info on every cell (both infected and uninfected) in transcriptome*

In [None]:
viral_genes = pd.read_csv(viral_genes_by_cell_csv)
display(viral_genes)

*N.b. contributes progeny CSV only has information about **infected** cells*

In [None]:
contributes_progeny = pd.read_csv(contributes_progeny_by_cell_csv)
display(contributes_progeny)

Combine dataframes:

In [None]:
infected_cells_genes_progeny = pd.merge(
    left=viral_genes,
    right=contributes_progeny,
    on='cell_barcode',
    how='right'
)

display(infected_cells_genes_progeny)

Plot progeny contribution by each viral gene

In [None]:
progeny_contribution_by_gene = (
    p9.ggplot(infected_cells_genes_progeny,
              p9.aes(x='gene_present',
                     fill='contributes_progeny')) +
    p9.geom_bar(stat='count') +
    p9.facet_grid('source~gene') + 
    p9.ggtitle('Progeny contribution and viral gene detection\n'
               'in each infected cell\n'
               f'{expt}') +
    p9.theme(figure_size=(8, 4),
                plot_title=p9.element_text(size=9),
                axis_title=p9.element_text(size=9),
                legend_title=p9.element_text(size=9),
                legend_title_align='center') +
    p9.scale_fill_manual(CBPALETTE[0:])
)

display(progeny_contribution_by_gene)

### Progeny by viral genes per cell

Plot progeny contribution by n viral genes

In [None]:
n_viral_genes_histogram = (
    p9.ggplot((infected_cells_genes_progeny
               [['cell_barcode', 'source', 'n_viral_genes', 'contributes_progeny']]
               .drop_duplicates()),
              p9.aes(x='n_viral_genes',
                     fill='contributes_progeny')) +
    p9.geom_bar(stat='count', position='dodge') +
    p9.facet_grid('source~') + 
    p9.ggtitle('Progeny contribution and number of viral genes\n'
               'in each infected cell\n'
               f'{expt}') +
    p9.labs(x='n viral genes detected',
            y='n cells') +
    p9.theme(figure_size=(6, 4),
                plot_title=p9.element_text(size=9),
                axis_title=p9.element_text(size=9),
                legend_title=p9.element_text(size=9),
                legend_title_align='center') +
    p9.scale_fill_manual(CBPALETTE[0:])
)

display(n_viral_genes_histogram)

## Output
Export plot and n_viral_genes per infected cell CSV.

In [None]:
# save plot
print(f"Saving plot to {plot}")
p9.ggsave(plot=n_viral_genes_histogram, filename=plot, verbose=False)