# Process sequencing-based neutralization assay plate
This notebook is designed to be run using `snakemake`.

## Setup
Import Python modules:

In [1]:
import altair as alt

import neutcurve

import pandas as pd

_ = alt.data_transformers.disable_max_rows()

Get the variables passed by `snakemake`:

In [3]:
count_csvs = snakemake.input.count_csvs
fate_csvs = snakemake.input.fate_csvs
viral_library_csv = snakemake.input.viral_library_csv
neut_standard_set_csv = snakemake.input.neut_standard_set_csv
frac_infectivity_csv = snakemake.output.frac_infectivity_csv
samples = snakemake.params.samples
plate_params = snakemake.params.plate_params
plate = snakemake.wildcards.plate

samples_df = plate_params["samples"]

assert len(samples) == len(count_csvs) == len(fate_csvs) == len(samples_df)

print(f"Processing {plate=}")

Processing plate='plate9'


## Statistics on barcode-parsing for each sample
Plot the "fates" of the sequencing reads parsed for each sample on the plate:

In [8]:
fates = (
    pd.concat([pd.read_csv(f).assign(sample=s) for f, s in zip(fate_csvs, samples)])
    .merge(samples_df, validate="many_to_one", on="sample")
    .assign(fate_counts=lambda x: x.groupby("fate")["count"].transform("sum"))
    .query("fate_counts > 0")  # only keep fates with at least one count
    [["fate", "count", "serum", "sample_noplate", "dilution_factor"]]
)

assert len(fates) == len(fates.drop_duplicates())

fates

Unnamed: 0,fate,count,serum,sample_noplate,dilution_factor
0,valid barcode,403240,M121d0,M121d0_20,20
1,low quality barcode,65505,M121d0,M121d0_20,20
2,unparseable barcode,44859,M121d0,M121d0_20,20
5,valid barcode,1649517,M121d0,M121d0_60,60
6,low quality barcode,265894,M121d0,M121d0_60,60
...,...,...,...,...,...
471,low quality barcode,468483,none,none-15,
472,unparseable barcode,41109,none,none-15,
475,valid barcode,1485582,none,none-16,
476,low quality barcode,244410,none,none-16,
