# Calculate neutralization for the strains included in the library
Here we compute the  IC50-like measurements for the selections that we performed in a 96-well plate

First, import Python modules:

In [None]:
import os
import altair as alt

import pandas as pd
import yaml
import numpy as np
import neutcurve
from neutcurve import HillCurve

from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

This notebook is parameterized by `papermill`. The next cell is tagged as parameters to get the passed parameters.

In [None]:
# this cell is tagged 'parameters' for `papermill` parameterization
snakemake = None
median_ic50s = None

In [None]:
# Set the input and output paths
if snakemake is not None:
    filepath_prefix = ""
else:
    filepath_prefix = "../../"

with open(filepath_prefix + 'config.yml') as f:
    config = yaml.safe_load(f)

## Read input data for generating dataframe with fraction infectivity measurements
Read configuration:

Read the fraction infectivity measurements calculated from the normalized barcode runs:

In [None]:
# Read in the fraction infectivity files
barcode_runs_df = pd.read_csv(filepath_prefix + config['barcode_runs'])
plates = barcode_runs_df["plate"].unique().tolist()
fractioninfectivity =  pd.concat([
        pd.read_csv(os.path.join(filepath_prefix, config["fraction_infectivity_dir"], f"{plate}_fractioninfectivity.csv"))
        .assign(plate=plate)
        for plate in plates
    ])

# Remove unassigned barcodes from fraction_infectivity file
fractioninfectivity_filtered = fractioninfectivity.dropna()

In [None]:
# Here we are fixing setting every fraction infectivity that is greater than 1 to 1, so as to make the curves a bit easier to look at.
fractioninfectivity_fixtop = fractioninfectivity_filtered
fractioninfectivity_fixtop['fraction infectivity'] = np.where(fractioninfectivity_fixtop['fraction infectivity']>1, 1,fractioninfectivity_fixtop['fraction infectivity'])

In [None]:
# Fit curves to each of the barcoded variants for each sample
fits = neutcurve.CurveFits(fractioninfectivity_fixtop)

## Creating this dataframe from fits.fitParams takes a very long time, this might be improved if we were able to remove bad curves first?

In [None]:
fit_parameters = fits.fitParams()

In [None]:
fit_parameters.to_csv(os.path.join(filepath_prefix, config["neutralization_titers_by_barcode"]))

## Now we need to create a dictionary of barcoded strains such that we can call by strain to plot

In [None]:
# Generate a dictionary of barcodes and variants.
# We are using "virus" column here as it already contains the replicate label

barcode_strain = dict(zip(fractioninfectivity_filtered.virus,fractioninfectivity_filtered.strain))

condense = {}
for key,value in barcode_strain.items():
    if value not in condense:
        condense[value] = []
        condense[value].append(key)
    else:
        condense[value].append(key)
               
#To confirm that we have the right number of strains in the library:
output = 'There are ' + str(len(condense)) + ' strains in the library'
print(output)

In [None]:
#Prior to averaging, remove things that have poor slopes
fit_parameters = fit_parameters.loc[fit_parameters['slope']>0]
fit_parameters['strain'] = fit_parameters['virus'].map(barcode_strain)

#Find median NT50 for each strain for each individual this is currently treating all barcodes from replicates in same pool, need to update so that it takes average of NT50 from each replicate for strains that have replicates
median_ic50_frombarcodes = fit_parameters.groupby(['serum','strain'], as_index=False).median(numeric_only=True)
median_ic50_frombarcodes['NT50'] = 1 / median_ic50_frombarcodes['ic50']

#Also adding in some more reasonable names for columns so that I can transform the dataframe to show columns
median_ic50_frombarcodes['individual'] =  median_ic50_frombarcodes['serum'].str.split('d').str[0]
median_ic50_frombarcodes['day'] =  median_ic50_frombarcodes['serum'].str.split('d').str[1]
median_ic50_frombarcodes['day'] = pd.to_numeric(median_ic50_frombarcodes['day'])

#median_ic50_frombarcodes.to_csv(os.path.join(filepath_prefix, config["neutralization_titers_by_strain"]))

In [None]:
# This step is quite slow, we are creating a pdf for each serum sample, then outputing all the curves for all timepoints to the file. 

listofselection = fits.sera
print(listofselection)
sera = []
for i in listofselection:
    if i[0:4] not in sera:
        sera.append(i[0:4])
sera_withrep = ['D041','M099','Y044','D042','Y184','M131']
toplot = {}
for indiv in sera:
    samples = [m for m in listofselection if indiv in m]
    toplot[indiv] = samples

for indiv in toplot:
    with PdfPages(os.path.join(filepath_prefix, config["selection_dir"]+"/"+indiv+".pdf")) as pdf:
        if indiv in sera_withrep:
            for i in condense:
                fig, axes = fits.plotSera(sera=toplot[indiv], viruses=condense[i],xlabel='dilution',ylabel='Relative Fraction Infectivity',legendfontsize=16,legendtitle=i,max_viruses_per_subplot=8)
                pdf.savefig(bbox_inches='tight')  # saves the current figure into a pdf page
                plt.close()
        else:
            for i in condense:
                viruses_list =  [x for x in condense[i] if "_rep2" not in x]
                fig, axes = fits.plotSera(sera=toplot[indiv], viruses=viruses_list,xlabel='dilution',ylabel='Relative Fraction Infectivity',legendfontsize=16,legendtitle=i,max_viruses_per_subplot=8)
                pdf.savefig(bbox_inches='tight')  # saves the current figure into a pdf page
                plt.close()
    plt.close('all')

In [None]:
if snakemake:
    median_ic50_frombarcodes.to_csv(median_ic50s, index=False)
else:
    median_ic50_frombarcodes.to_csv(os.path.join(filepath_prefix, config["neutralization_titers_by_strain"]))