# Calculate neutralization for the strains included in the library
Here we compute the  IC50-like measurements for the selections that we performed in a 96-well plate

First, import Python modules:

In [1]:
import os
import altair as alt

import pandas as pd
import yaml
import numpy as np
import neutcurve
from neutcurve import HillCurve

from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [2]:
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

Change working directory to top directory of repo:

In [3]:
os.chdir('../../')
#os.chdir('/fh/fast/bloom_j/computational_notebooks/aloes/2023/NGS_Neut_Assay')
os.getcwd()

'/fh/fast/bloom_j/computational_notebooks/aloes/2023/NGS_Neut_Assay'

## Read input data for generating dataframe with fraction infectivity measurements
Read configuration:

In [4]:
with open('config.yml') as f:
    config = yaml.safe_load(f)

Read the fraction infectivity measurements calculated from the normalized barcode runs:

In [5]:
fractioninfectivity = pd.read_csv(config["fraction_infectivity_file"])
fractioninfectivity

Unnamed: 0.1,Unnamed: 0,serum,barcode,concentration,fraction infectivity,strain,sample,replicate,virus
0,0,D002d0,CGTTTAAACAATGAAG,0.000003,0.938447,A/India-Pune-Nivcov2221170/2022,pdmH1N1_lib2022-D002d0-393660.0-1-230801-Plate1,rep1,CGTTTAAACAATGAAG_rep1
1,1,D002d0,AGTGTCCCTAAGAGGC,0.000003,1.338317,A/Bangladesh/8002/2021,pdmH1N1_lib2022-D002d0-393660.0-1-230801-Plate1,rep1,AGTGTCCCTAAGAGGC_rep1
2,2,D002d0,CTGCACGAGAGACTTC,0.000003,1.268718,A/Perth/1/2022,pdmH1N1_lib2022-D002d0-393660.0-1-230801-Plate1,rep1,CTGCACGAGAGACTTC_rep1
3,3,D002d0,GTCCGTTGATAAAGAG,0.000003,1.636438,A/Cote_DIvoire/1448/2021,pdmH1N1_lib2022-D002d0-393660.0-1-230801-Plate1,rep1,GTCCGTTGATAAAGAG_rep1
4,4,D002d0,ATACCTCAACCTTGAA,0.000003,1.094941,A/Bangladesh/8036/2021,pdmH1N1_lib2022-D002d0-393660.0-1-230801-Plate1,rep1,ATACCTCAACCTTGAA_rep1
...,...,...,...,...,...,...,...,...,...
105595,108472,Y184d30,AACGAATGAATTTCTT,0.000008,1.246951,A/Togo/0274/2021,pdmH1N1_lib2022-Y184d30-131220.0-2-230926-Plate12,rep2,AACGAATGAATTTCTT_rep2
105596,108473,Y184d30,ACGGAATCCCCTGAGA,0.000008,0.491026,A/Washington/23/2020,pdmH1N1_lib2022-Y184d30-131220.0-2-230926-Plate12,rep2,ACGGAATCCCCTGAGA_rep2
105597,108474,Y184d30,GCAATCCCGCAATTTG,0.000008,0.821261,A/Ghana/2080/2020,pdmH1N1_lib2022-Y184d30-131220.0-2-230926-Plate12,rep2,GCAATCCCGCAATTTG_rep2
105598,108475,Y184d30,GCCGGAGGGCATTTTC,0.000008,1.331956,A/Belgium/H0038/2022,pdmH1N1_lib2022-Y184d30-131220.0-2-230926-Plate12,rep2,GCCGGAGGGCATTTTC_rep2


In [6]:
# Here we are fixing setting every fraction infectivity that is greater than 1 to 1, so as to make the curves a bit easier to look at.
fractioninfectivity_fixtop = fractioninfectivity
fractioninfectivity_fixtop['fraction infectivity'] = np.where(fractioninfectivity_fixtop['fraction infectivity']>1, 1,fractioninfectivity_fixtop['fraction infectivity'])

In [7]:
# Fit curves to each of the barcoded variants for each sample
fits = neutcurve.CurveFits(fractioninfectivity_fixtop)

In [8]:
fit_parameters = fits.fitParams()

  return b + (t - b) / (1 + (c / m)**s)
  return b + (t - b) / (1 + (c / m)**s)


In [85]:
# Create directory for selection results
#os.mkdir(config["selection_dir"])

FileExistsError: [Errno 17] File exists: 'results/selections'

In [9]:
fit_parameters.to_csv(config["neutralization_titers_by_barcode"])

## Now we need to create a dictionary of barcoded strains such that we can call by strain to plot

In [None]:
# Generate a dictionary of barcodes and variants.
# We are using "virus" column here as it already contains the replicate label
barcode_strain = dict(zip(fractioninfectivity.virus,fractioninfectivity.strain))

condense = {}
for key,value in barcode_strain.items():
    if value not in condense:
        condense[value] = []
        condense[value].append(key)
    else:
        condense[value].append(key)
               
#To confirm that we have the right number of strains in the library:
output = 'There are ' + str(len(condense)) + ' strains in the library'
print(output)

In [None]:
#Prior to averaging, remove things that have poor slopes
fit_parameters = fit_parameters.loc[fit_parameters['slope']>0]
fit_parameters['strain'] = fit_parameters['virus'].map(barcode_strain)

#Find median NT50 for each strain for each individual
median_ic50_frombarcodes = fit_parameters.groupby(['serum','strain'], as_index=False).median(numeric_only=True)
median_ic50_frombarcodes['NT50'] = 1 / median_ic50_frombarcodes['ic50']

#Also adding in some more reasonable names for columns so that I can transform the dataframe to show columns
median_ic50_frombarcodes['individual'] =  median_ic50_frombarcodes['serum'].str.split('d').str[0]
median_ic50_frombarcodes['day'] =  median_ic50_frombarcodes['serum'].str.split('d').str[1]
median_ic50_frombarcodes['day'] = pd.to_numeric(median_ic50_frombarcodes['day'])

median_ic50_frombarcodes.to_csv(config["neutralization_titers_by_strain"])

In [None]:
%%time
# This step is quite slow, we are creating a pdf for each serum sample, then outputing all the curves for all timepoints to the file. 

listofselection = fits.sera
print(listofselection)
sera = []
for i in listofselection:
    if i[0:4] not in sera:
        sera.append(i[0:4])
#sera = ['M099']
sera_withrep = ['D041','M099','Y044','D042','Y184','M131']
toplot = {}
for indiv in sera:
    samples = [m for m in listofselection if indiv in m]
    toplot[indiv] = samples

for indiv in toplot:
    with PdfPages(config["selection_dir"]+"/"+indiv+".pdf") as pdf:
        if indiv in sera_withrep:
            for i in condense:
                fig, axes = fits.plotSera(sera=toplot[indiv], viruses=condense[i],xlabel='dilution',ylabel='Relative Fraction Infectivity',legendfontsize=16,legendtitle=i,max_viruses_per_subplot=8)
                pdf.savefig(bbox_inches='tight')  # saves the current figure into a pdf page
                plt.close()
        else:
            for i in condense:
                viruses_list =  [x for x in condense[i] if "_rep2" not in x]
                fig, axes = fits.plotSera(sera=toplot[indiv], viruses=condense[i],xlabel='dilution',ylabel='Relative Fraction Infectivity',legendfontsize=16,legendtitle=i,max_viruses_per_subplot=8)
                pdf.savefig(bbox_inches='tight')  # saves the current figure into a pdf page
                plt.close()
    plt.close('all')