# Plot NT50 profiles for individually-run sera

In [1]:
# Import packages
import os
import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Ignore error message from Altair about large dataframes
_ = alt.data_transformers.disable_max_rows()

In [2]:
# Basic color palette
color_palette = [
    '#345995', #blue
    '#03cea4', #teal
    '#ca1551', #red
    '#eac435', #yellow
               ]

In [3]:
# Define inputs
datadir = '../data'
resultsdir = '../results'
os.makedirs(datadir, exist_ok = True)
os.makedirs(resultsdir, exist_ok = True)

# Define SCH titers
SCH_titers = (pd.read_csv('../../../results/aggregated_titers/titers_SCH.csv')
             .assign(
                 barcode = lambda x: x['serum'].str.split('_').str[2],)
             )


# Define Penn titers
titers_PennVaccineCohort = (pd.read_csv('../../../results/aggregated_titers/titers_PennVaccineCohort.csv')
                            .assign(
                                barcode = lambda x: x['serum'].str.split('_').str[2],
                                timepoint = lambda x: 'd' + x['serum'].str.split('d').str[1])
                           )

# Define Australian MA22 vaccine cohort titers
Australia_MA22_titers = (pd.read_csv('../../../results/aggregated_titers/titers_AusVaccineCohort.csv')
                         .assign(
                             barcode = lambda x: x['serum'].str.split('_').str[1],
                             timepoint = lambda x: x['serum'].str.split('_').str[2])
                        )

# Concatenate all titers into one dataframe
all_titers = pd.concat([
    SCH_titers,
    titers_PennVaccineCohort,
    Australia_MA22_titers
])

# Add a slightly more informative 'group_detail' column that incorporates pre- or post-vax info
all_titers['group_detail'] = np.where(all_titers['group'] == 'SCH', 
                                         all_titers['group'],  # If the value matches the string
                                         all_titers['group'].astype(str) + '_' + all_titers['timepoint'])  # Otherwise, combination of col2 and col3

In [4]:
# Define virus order
viral_plot_order = pd.read_csv('../../../data/H3N2library_2023-2024_strain_order.csv')
virus_order = [v for v in viral_plot_order.strain]

# Define vaccine strains
vaccine_strains = []
with open('../data/vaccine_strains.csv') as f:
    for line in f:
        line = line.strip('\n')
        if 'strain' not in line:
            vaccine_strains.append(line)

# Define separate list where Massachusetts/18/2022 is reclassified as a 2023-circulating strains
vaccine_strains_no_Massachusetts = [item for item in vaccine_strains if item != 'A/Massachusetts/18/2022']

## Make toy NT50 plot for overview figure

In [5]:
# Configure
color_scheme = alt.Color('serum', title = 'serum', legend=None).scale(scheme='greys')
titer_range = [30, 16000]
titleFontSize=19
labelFontSize=19
width = 200
height = 220

four_special_viruses = [
    'A/Krabi/THIS050/2023',
    'A/Ontario/RV00796/2023',
    'A/Bangkok/P3755/2023',
    'A/Massachusetts/18/2022',
]

four_special_viruses_labelColors = {
    'condition': [
        {'test' : 'datum.label == "A/Massachusetts/18/2022"', 'value': color_palette[2]},
        {'test' : 'datum.label == "A/Krabi/THIS050/2023"', 'value': color_palette[0]},
        {'test' : 'datum.label == "A/Bangkok/P3755/2023"', 'value': color_palette[1]},
        {'test' : 'datum.label == "A/Ontario/RV00796/2023"', 'value': color_palette[3]},
    ],
     'value': 'grey'} # The default value if no condition is met


data = (SCH_titers[SCH_titers['virus'].isin(four_special_viruses)]
        .query('serum == "SCH23_y2009_s007"')
       )

# Make plot
line = (alt.Chart(data, width=width, height=height)
        .mark_line(size = 5, point = False)
        .encode(
            alt.X('virus', 
                      axis = alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize,
                                      labelColor = four_special_viruses_labelColors,
                                      labelFontWeight = 'bold',
                                      title = None, titleY = 330,
                                      labelAlign = 'right',labelLimit = 1000, labelAngle=310), 
                      sort = four_special_viruses
                     ),
            alt.Y('titer', 
                      scale = alt.Scale(type='log', domain=titer_range, nice=False), 
                      axis=alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize, title='NT50')
                 ),
            color = color_scheme,
        )
       )

points = (alt.Chart(data, width=width)
          .mark_point(size = 120, stroke='black', strokeWidth=2, filled=True)
          .encode(
              alt.X('virus', sort = four_special_viruses),
              alt.Y('titer'),
              color = color_scheme
          )
         )
    

layered = alt.layer(line, points).properties(title=''
                                            ).configure_title(fontSize=labelFontSize)



# Save
# outfile = './results/toy_nt50_plot.svg'
# layered.save(outfile, dpi = 600)
layered

## Make nice individual sera plots
For those individuals with more interesting neutralization profiles.

In [6]:
def plot_individual_sera(data):
    """ Function for plotting individual sera in different panels """

    # Configure
    color_scheme = alt.Color('serum', title = 'serum', legend=None).scale(range=color_palette)
    titer_range = [40, 16000]
    titleFontSize=19
    labelFontSize=19
    width = 1300
    height = 140

    markerSize = 150
    

    # Make plot
    line = (alt.Chart(data, width=width, height=height)
            .mark_line(size = 5, point = False)
            .encode(
                alt.X('virus', 
                          axis = alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize,
                                          # labelColor = all_vacc_label_colors,labelFontWeight = all_vacc_label_weights,
                                          title = None, titleY = 330,labelAlign = 'right',labelLimit = 1000), 
                          sort = virus_order
                         ),
                alt.Y('titer', 
                          scale = alt.Scale(type='log', domain=titer_range, nice=False), 
                          axis=alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize, title='NT50')
                     ),
                color = color_scheme,
            )
           )
    
    points = (alt.Chart(data, width=width)
              .mark_point(size = markerSize, stroke='black', strokeWidth=2, filled=True)
              .encode(
                  alt.X('virus', sort = virus_order),
                  alt.Y('titer'),
                  color = color_scheme
              )
             )
        
    
    layered = (alt.layer(line, points)
               .facet('serum', columns = 1)
               .properties(title='')
               .configure_title(fontSize=labelFontSize)
               .configure_header(title=None, labelFontSize=labelFontSize, labelFontWeight='bold')
              )

    return layered


In [7]:
# Identify specific sera to plot and their age and sex
sera_to_plot_dict = {
    'SCH23_y2009_s007': '15 yo female',
    'PENN23_y1999_s048_d0': '24 yo female'
}

# Subset data to just circulting strains
data = all_titers[~all_titers['virus'].isin(vaccine_strains_no_Massachusetts)]

# Subset data to just sera of interest
data = data[data['serum'].isin(list(sera_to_plot_dict.keys()))]

# Replace serum IDs with more informative metadata
data = data.replace({'serum': sera_to_plot_dict})


# Produce and save final plot
plot = plot_individual_sera(data)
plot

# Save
outfile = './results/individual_example_profile.svg'
layered.save(outfile, dpi = 600)
layered

## Make individual line plots and median dot plots for US-based pediatric and adult pre-vaccination cohorts

In [8]:
# Define group fine dictionary
group_detail_dict = {'SCH': 'Children',
                   'PennVaccineCohort_d0': 'Adults Pre-Vaccination',
                   'PennVaccineCohort_28': 'Adults Post-Vaccination'}

group_sort_list = ['SCH', 'PennVaccineCohort_d0', 'PennVaccineCohort_d28', 'MA22VaccineCohort_prevax', 'MA22VaccineCohort_postvax',]


In [9]:
# Make plot with median and IQR for vaccine groups on same axis

# # Configure
# color_palette = [
#     '#557fc4', # Light, presenter-friendly blue
#     '#03cea4', #teal
#     '#ca1551', #red
#     '#eac435', #yellow
#                ]

color_scheme = alt.Color('group_detail', 
                         title = 'Cohort', 
                         sort=group_sort_list,
                         legend=None
                        ).scale(range=color_palette)
titer_range = [30, 16000]
titleFontSize=19
labelFontSize=19
lineOpacity = 0.3
lineSize = 2.8
markerOpacity = 0.8
markerSize = 150
width = 1300
height = 220

# Get data
cohorts_to_plot = ['SCH', 'PennVaccineCohort_d0']
data = all_titers[all_titers['group_detail'].isin(cohorts_to_plot)]
data = (data[~data['virus'].isin(vaccine_strains_no_Massachusetts)]
        .replace({'group_detail': group_detail_dict})
       )



line = (alt.Chart(data, width = width,height=height)
        .mark_line(size = lineSize, point = False, opacity = lineOpacity)
        .encode(
            alt.X('virus', 
                      axis = alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize,
                                      title = None,labelLimit = 1000, labelAlign = 'right',
                                      # labelColor = all_vacc_label_colors,labelFontWeight = all_vacc_label_weights,
                                     ),             
                      sort = virus_order
                     ),
            alt.Y('titer', 
                      scale =alt.Scale(type='log',domain=titer_range, nice=False), 
                      axis=alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize, title="NT50")
                 ),
            detail = 'serum',
            color = color_scheme,
            # order = 'virus_order'
        )
       )

points = (alt.Chart(data, width=width)
          .mark_point(size = markerSize,
                      stroke = 'black',
                      strokeWidth = 2,
                      filled=True, 
                      opacity=markerOpacity)
          .encode(
              alt.X('virus', sort = virus_order),
              alt.Y('median(titer)'),
              color = color_scheme
          )
         )
    
layered = (alt.layer(line, points)
           .facet(
               row = alt.Row('group_detail:N', sort = group_sort_list, 
                             title = None,
                             # header=None
                            ),
               config = alt.Config(
                        legend = alt.LegendConfig(titleFontSize=titleFontSize, labelFontSize = labelFontSize,
                            strokeColor='gray',padding=10,cornerRadius=10,
                            labelLimit = 1000 # Let legend labels be as long as they want
                        )
                       )
           ).configure_header(labelFontSize=labelFontSize,
                              labelFontWeight='bold',
                             labelOrient = 'top')
)

layered


## Australian vaccine cohort (received MA-22 vaccine)

In [10]:
# Make plot with median and IQR for vaccine groups on same axis

# Configure
color_palette = [
    '#345995', #blue
    '#03cea4', #teal
    '#ca1551', #red
    '#eac435', #yellow
               ]

color_scheme = alt.Color('group_detail', 
                         title = 'Cohort', 
                         sort=group_sort_list,
                         legend=None
                        ).scale(range=color_palette[2:])
titer_range = [30, 16000]
titleFontSize=19
labelFontSize=19
lineOpacity = 0.4
lineSize = 4
width = 1000
height = 240

# Get data
cohorts_to_plot = ['AusVaccineCohort']
data = all_titers[all_titers['group'].isin(cohorts_to_plot)]
data = (data[~data['virus'].isin(vaccine_strains_no_Massachusetts)]
        # .replace({'group_detail': group_fine_dict})
       
       )



line = (alt.Chart(data, width = width,height=height)
        .mark_line(size = lineSize, point = False, opacity = lineOpacity)
        .encode(
            alt.X('virus', 
                      axis = alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize,
                                      title = None,labelLimit = 1000, labelAlign = 'right',
                                      # labelColor = all_vacc_label_colors,labelFontWeight = all_vacc_label_weights,
                                     ),             
                      sort = virus_order
                     ),
            alt.Y('titer', 
                      scale =alt.Scale(type='log',domain=titer_range, nice=False), 
                      axis=alt.Axis(grid=False, titleFontSize=titleFontSize, labelFontSize=labelFontSize, title="NT50")
                 ),
            detail = 'serum',
            color = color_scheme,
            tooltip = ['serum']
        ).interactive()
       )

points = (alt.Chart(data, width=width)
          .mark_point(size = 120,
                      stroke = 'black',
                      strokeWidth = 2,
                      filled=True)
          .encode(
              alt.X('virus', sort = virus_order),
              alt.Y('median(titer)'),
              color = color_scheme
          )
         )
    
layered = (alt.layer(line, points)
           .facet(
               row = alt.Row('group_detail:N', sort = group_sort_list, 
                             title = None,
                             # header=None
                            ),
               config = alt.Config(
                        legend = alt.LegendConfig(titleFontSize=titleFontSize, labelFontSize = labelFontSize,
                            strokeColor='gray',padding=10,cornerRadius=10,
                            labelLimit = 1000 # Let legend labels be as long as they want
                        )
                       )
           ).configure_header(labelFontSize=labelFontSize,
                              labelFontWeight='bold',
                             labelOrient = 'top')
)

layered
