### This notebook analyzes ephrin neutralization of Nipah pseudovirus

In [None]:
# this cell is tagged as parameters for `papermill` parameterization
altair_config = None
nipah_config = None

ephrin_binding_neuts_file = None
ephrin_validation_curves = None

validation_ic50s_file = None

e2_monomeric_binding_file = None
e3_dimeric_binding_file = None

ephrin_curve_plot = None
e2_curve_plot = None
e3_curve_plot = None
e2_corr_plot = None
e3_corr_plot = None

In [None]:
import warnings
import math
import os

from IPython.display import display, HTML, SVG

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import neutcurve
from neutcurve.colorschemes import CBPALETTE
from neutcurve.colorschemes import CBMARKERS
import scipy.stats

import yaml

import altair as alt
import re

print(f"Using `neutcurve` version {neutcurve.__version__}")

In [None]:
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

if os.getcwd() == '/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/':
    pass
    print("Already in correct directory")
else:
    os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
    print("Setup in correct directory")

In [None]:
#altair_config = 'data/custom_analyses_data/theme.py'
#nipah_config = 'nipah_config.yaml'
#ephrin_binding_neuts_file = 'data/custom_analyses_data/experimental_data/bat_ephrin_neuts.csv'
#ephrin_validation_curves = 'data/custom_analyses_data/experimental_data/binding_single_mutant_validations.csv'
#validation_ic50s_file = 'data/custom_analyses_data/experimental_data/receptor_IC_validations.csv'
#e2_monomeric_binding_file = 'results/receptor_affinity/averages/EFNB2_monomeric_mut_effect.csv'
#e3_dimeric_binding_file = 'results/receptor_affinity/averages/EFNB3_dimeric_mut_effect.csv'

#ephrin_curve_plot
#e2_curve_plot
#e3_curve_plot
#e2_corr_plot
#e3_corr_plot

In [None]:
if altair_config:
    with open(altair_config, 'r') as file:
        exec(file.read())

with open(nipah_config) as f:
    config = yaml.safe_load(f)

### First plot neuts with WT virus

In [None]:
df = pd.read_csv(ephrin_binding_neuts_file)
df['virus'] = df['virus'].replace({'E2-dimeric': 'Ephrin-B2-dimeric', 'E2-monomeric': 'Ephrin-B2-monomeric','E3-dimeric': 'Ephrin-B3-dimeric', 'E3-monomeric': 'Ephrin-B3-monomeric'})
fits = neutcurve.curvefits.CurveFits(
            data=df,
            fixbottom=0,
            #max_sera_per_subplot=10
            )

fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])

def extract_dataframe_from_neutcurve(serum, viruses, replicate='average'):
    curves = []
    # Loop over each virus type and retrieve the curve
    for virus in viruses:
        curve = fits.getCurve(serum=serum, virus=virus, replicate=replicate)
        df = curve.dataframe()
        df['virus'] = virus
        curves.append(df)
    
    # Concatenate all the dataframes into one
    combined_curve = pd.concat(curves, axis=0)
    return combined_curve

serum = 'CHO-EFNB3' #pull out the neuts that were done on CHO-EFNB3 cells, not E2
viruses = ['Ephrin-B2-dimeric','Ephrin-B2-monomeric','Ephrin-B3-dimeric','Ephrin-B3-monomeric']
curve = extract_dataframe_from_neutcurve(serum, viruses)
curve['upper'] = curve['measurement'] + curve['stderr']
curve['lower'] = curve['measurement'] - curve['stderr']

def plot_neut_curve(df):
    chart = alt.Chart(df).mark_line(size=1.5,opacity=1).encode(
        x=alt.X('concentration:Q',scale=alt.Scale(type='log'),axis=alt.Axis(format='.0e'),title='Concentration (μM)'),
        y=alt.Y('fit:Q',title='Fraction Infectivity',),
        color=alt.Color('virus',title='Receptor')
    ).properties(
        height=alt.Step(20),
        width=300,
    )
    circle = alt.Chart(df).mark_circle(size=50,opacity=1).encode(
        x=alt.X('concentration',scale=alt.Scale(type='log'),axis=alt.Axis(format='.0e'),title='Concentration (μM)'),
        y=alt.Y('measurement:Q',title='Fraction Infectivity'),
        color=alt.Color('virus',title='Receptor'),
    ).properties(
        height=alt.Step(20),
        width=300,
    )
    error = alt.Chart(df).mark_errorbar(opacity=1).encode(
        x='concentration',
        y=alt.Y('lower',title='Fraction Infectivity'),
        y2='upper',
        color='virus'
    )
    plot = chart+circle+error
    return plot


ephrin_curve = plot_neut_curve(curve)
ephrin_curve.display()
ephrin_curve.save(ephrin_curve_plot)
#ephrin_curve.save('ephrin_curve.json')

fitParams = fitParams.rename(columns={'serum': 'target_cells', 'virus': 'soluble_receptor'})
fitParams['ic50_nM'] = fitParams['ic50'] * 1000
subset = fitParams[fitParams['target_cells'] == 'CHO-EFNB3']
display(subset[['soluble_receptor','ic50_nM']])

### Pull in binding validation neut data

In [None]:
validation_curves = pd.read_csv(ephrin_validation_curves)

fits = neutcurve.curvefits.CurveFits(
            data=validation_curves,
            fixbottom=0,
            #max_sera_per_subplot=10
            )

fitParams = fits.fitParams(ics=[50, 90, 95, 97, 98, 99])
fitParams = fitParams.rename(columns={'serum': 'target_cells', 'virus': 'mutant'})
#display(fitParams)
def extract_dataframe_from_neutcurve(serum, viruses, replicate='average'):
    curves = []
    # Loop over each virus type and retrieve the curve
    for virus in viruses:
        curve = fits.getCurve(serum=serum, virus=virus, replicate=replicate)
        df = curve.dataframe()
        df['virus'] = virus
        curves.append(df)
    
    # Concatenate all the dataframes into one
    combined_curve = pd.concat(curves, axis=0)
    return combined_curve

def get_curves(df,serum):
    #serum = name
    viruses = ['H333Q', 'Q492R', 'V507I', 'Q530F', 'S553W', 'D555K', 'WT']
    curve = extract_dataframe_from_neutcurve(serum, viruses)
    curve['upper'] = curve['measurement'] + curve['stderr']
    curve['lower'] = curve['measurement'] - curve['stderr']
    return curve

e2_validation_curves = get_curves(validation_curves,'EFNB2-monomeric')
e3_validation_curves = get_curves(validation_curves,'EFNB3-dimeric')

# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
    # Sort based on the numerical part in mutation strings, e.g., '530' in 'Q530F'
    def extract_number(virus):
        num = re.search(r'\d+', virus)
        return int(num.group()) if num else 0

    array = sorted(array, key=extract_number)

    # Move 'WT' to the beginning of the list
    if 'WT' in array:
        array.remove('WT')
        array.insert(0, 'WT')
    return array

def plot_validation_curves(df,name):
    # Define the category10 colors manually
    category10_colors = ['#4E79A5', '#F18F3B', '#E0585B', '#77B7B2', '#5AA155', '#EDC958', '#AF7AA0', '#FE9EA8', '#9C7561', '#BAB0AC']

    # Adjust colors based on the unique mutations
    colors = ['black'] + category10_colors[:len(df['virus'].unique())-1]

    chart = alt.Chart(df).mark_line(size=1,opacity=1).encode(
        x=alt.X('concentration:Q',scale=alt.Scale(type='log'),axis=alt.Axis(format='.0e',tickCount=3),title=f'{name} conc. (μg/mL)'),
        y=alt.Y('fit:Q',title='Fraction Infectivity',axis=alt.Axis(tickCount=3)),
        color=alt.Color('virus',title='Virus',scale=alt.Scale(domain=custom_sort_order(df['virus'].unique()), range=colors))
    ).properties(
        height=alt.Step(10),
        width=300,
    )
    circle = alt.Chart(df).mark_circle(size=50,opacity=1).encode(
        x=alt.X('concentration',scale=alt.Scale(type='log'),axis=alt.Axis(format='.0e',tickCount=3),title=f'{name} conc. (μg/mL)'),
        y=alt.Y('measurement:Q',title='Fraction Infectivity',axis=alt.Axis(tickCount=3)),
        color=alt.Color('virus',title='Virus',scale=alt.Scale(domain=custom_sort_order(df['virus'].unique()), range=colors))
    ).properties(
        height=alt.Step(10),
        width=300,
    )
    error = alt.Chart(df).mark_errorbar(opacity=1).encode(
        x='concentration',
        y=alt.Y('lower',title='Fraction Infectivity'),
        y2='upper',
        color='virus'
    )
    plot = chart+circle+error
    return plot

ephrin_b2_neut_curve = plot_validation_curves(e2_validation_curves,'Ephrin-B2-monomeric')
ephrin_b2_neut_curve.display()
ephrin_b2_neut_curve.save(e2_curve_plot)
#ephrin_b2_neut_curve.save('ephrinb2.json')

ephrin_b3_neut_curve = plot_validation_curves(e3_validation_curves,'Ephrin-B3-dimeric')
ephrin_b3_neut_curve.display()
ephrin_b3_neut_curve.save(e3_curve_plot)
#ephrin_b3_neut_curve.save('ephrinb3.json')

# Find the IC50 values in ng/mL and relative IC50 to WT
fitParams['ic50_ng'] = (fitParams['ic50'] * 1000).round(1)
efnb2 = fitParams[fitParams['target_cells'] == 'EFNB2-monomeric']
WT_IC50 = efnb2[efnb2['mutant'] == 'WT']
wt50 = WT_IC50['ic50_ng'].unique()

fitParams['relative_ic50'] = (fitParams['ic50_ng'] / wt50).round(1)
display(fitParams[['target_cells','mutant','ic50_ng','relative_ic50']])

### Now plot correlations between IC50 and DMS binding score

Pull in data and prepare for plotting

In [None]:
validation_ic50s = pd.read_csv(validation_ic50s_file,na_filter=None)
e2_monomeric_binding = pd.read_csv(e2_monomeric_binding_file)
e3_dimeric_binding = pd.read_csv(e3_dimeric_binding_file)

def make_df(df,name):
    merged = validation_ic50s.merge(df,on=['mutation'])
    wt_rows = validation_ic50s[validation_ic50s['mutation'] == 'WT'].copy()
    wt_rows['Ephrin binding_median'] = 0.00000
    merged = pd.concat([merged, wt_rows], ignore_index=True)
    df_tmp = merged[merged['antibody'] == name]
    return df_tmp

e2_df_out = make_df(e2_monomeric_binding,'EFNB2-monomeric')
e3_df_out = make_df(e3_dimeric_binding,'EFNB3-dimeric')

Plot correlation data:

In [None]:
# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
    # Sort based on the numerical part in mutation strings, e.g., '530' in 'Q530F'
    def extract_number(mutation):
        num = re.search(r'\d+', mutation)
        return int(num.group()) if num else 0

    array = sorted(array, key=extract_number)

    # Move 'WT' to the beginning of the list
    if 'WT' in array:
        array.remove('WT')
        array.insert(0, 'WT')
    return array

def make_corr_chart(df,name):
    #calculate NT50
    df['NT50'] = 1 / df['measured IC50']

    #calculate R value:
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(df['Ephrin binding_median'], df['NT50'])
    print(f'The r-value is: {r_value:.2f}')

    # Define the category10 colors manually
    category10_colors = ['#4E79A5', '#F18F3B', '#E0585B', '#77B7B2', '#5AA155', '#EDC958', '#AF7AA0', '#FE9EA8', '#9C7561', '#BAB0AC']

    # Adjust colors based on the unique mutations
    colors = ['black'] + category10_colors[:len(df['mutation'].unique())-1]
    corr_chart = (
        alt.Chart(df)
        .encode(
            x=alt.X(
                "Ephrin binding_median",
                title="DMS binding Score",
            ),
            y=alt.Y(
                "NT50",
                title=f"{name} NT50 (μg/ml-1)",
                scale=alt.Scale(type="log",domain=[0.1,100]),
            ),
            color=alt.Color('mutation', title='Mutant', scale=alt.Scale(domain=custom_sort_order(df['mutation'].unique()), range=colors)),
        )
        .mark_point(filled=True, size=100, opacity=1)
        .properties(
            width=300, 
            height=300
        )
    )
    text = alt.Chart({'values':[{'x': df['Ephrin binding_median'].min(), 'y': df['NT50'].max(), 'text': f'r = {r_value:.2f}'}]}).mark_text(
        align='left',
        baseline='top',
        dx=5  
    ).encode(
        x=alt.X('x:Q'),
        y=alt.Y('y:Q'),
        text='text:N'
    )
    chart = corr_chart + text
    return chart

e2_corr = make_corr_chart(e2_df_out,'Ephrin-B2-monomeric')
e2_corr.display()
e2_corr.save(e2_corr_plot)
#e2_corr.save('e2_corr.json')

e3_corr = make_corr_chart(e3_df_out,'Ephrin-B3-dimeric')
e3_corr.display()
e3_corr.save(e3_corr_plot)
#e3_corr.save('e3_corr.json')

In [None]:
(e2_corr | e3_corr).display()