# Summarize titers across all sera and groups
Make summary plots for the titers from all sera.

In [3]:
import altair as alt

import neutcurve
import numpy as np
import pandas as pd
from scipy import stats

_ = alt.data_transformers.disable_max_rows()

Get variables from `snakemake`

In [5]:
#These are our input files. May want to add a file with the conditions so that we can plot by condition
input_titers = snakemake.input.input_titers
input_titers_old = snakemake.input.input_titers_old
viral_strain_plot_order = snakemake.input.viral_strain_plot_order
sample_metadata_file = snakemake.input.sample_metadata_file
HAI_titers = snakemake.input.HAI_titers_file

#These are the output files which are the data in html
titers_chart_html = snakemake.output.titers_chart_html
titers_chart_1xVax_html = snakemake.output.titers_chart_1xVax_html
titers_chart_2xVax_html = snakemake.output.titers_chart_2xVax_html
titers_chart_selectedpeople_html = snakemake.output.titers_chart_selectedpeople_html
titers_chart_median_w182_html = snakemake.output.titers_chart_median_w182_html
titers_chart_median_all_html = snakemake.output.titers_chart_median_all_html
titers_chart_foldchange_html = snakemake.output.titers_chart_foldchange_html
titers_chart_splitbygroup_w182_html = snakemake.output.titers_chart_splitbygroup_w182_html
titers_chart_splitbygroup_all_html = snakemake.output.titers_chart_splitbygroup_all_html
titers_chart_selectedserum_rotated_html = snakemake.output.titers_chart_selectedserum_rotated_html
titers_chart_serawithgap_rotated_html = snakemake.output.titers_chart_serawithgap_rotated_html

In [6]:
#Read in titer csv as a dataframe and get the order to plot the viruses, add metadata to sample file
titers = pd.read_csv(input_titers)
HAI_titers = pd.read_csv(HAI_titers)
sample_metadata = pd.read_csv(sample_metadata_file)
viruses_to_plot = pd.read_csv(viral_strain_plot_order)
assert len(titers) == len(titers.groupby(["serum", "virus"]))
viruses = viruses_to_plot.strain.tolist()
titers_withgroups = titers.merge(sample_metadata, on= "serum")
titers_withgroups['participant'] = "Participant " + titers_withgroups['individual']

In [13]:
titers_withgroups['timepoint'] = titers_withgroups['serum'].str[7:].astype(int)
titers_withgroups['pID'] = titers_withgroups['serum'].str[:6]
titers_withgroups = titers_withgroups.rename(columns={"group_y":"group","group_x":"cohort"})

titers_withgroups_vaccineonly = titers_withgroups.loc[titers_withgroups['virus'].str.contains("A/Hawaii/70/2019")]
titers_withgroups_vaccine = titers_withgroups_vaccineonly.rename(columns={"titer":"NGS_titer"})
HAI_titers = HAI_titers.rename(columns={"titer":"HAI_titer","strain":"virus"})

In [14]:
#Make a dataframe with both HAI and NGS neutralization assay titers, look at Pearson correlation
compare_HAI = titers_withgroups_vaccine.merge(HAI_titers, on=["pID","timepoint","virus"])
compare_HAI.corr(method= "pearson", numeric_only=True)

Unnamed: 0.1,NGS_titer,titer_sem,n_replicates,timepoint,Unnamed: 0,year,HAI_titer,log2_titer,fold_change
NGS_titer,1.0,0.918991,-0.117024,0.296169,0.005949,,0.925067,0.555113,0.885333
titer_sem,0.918991,1.0,-0.086751,0.275933,0.013987,,0.757808,0.523072,0.699681
n_replicates,-0.117024,-0.086751,1.0,-0.010605,-0.142883,,-0.0894,-0.022431,-0.119356
timepoint,0.296169,0.275933,-0.010605,1.0,0.01444,,0.380816,0.560875,
Unnamed: 0,0.005949,0.013987,-0.142883,0.01444,1.0,,0.050936,0.147444,-0.03443
year,,,,,,,,,
HAI_titer,0.925067,0.757808,-0.0894,0.380816,0.050936,,1.0,0.70185,0.949356
log2_titer,0.555113,0.523072,-0.022431,0.560875,0.147444,,0.70185,1.0,0.605129
fold_change,0.885333,0.699681,-0.119356,,-0.03443,,0.949356,0.605129,1.0


In [15]:
#Generate larger plot colored by day with both groups shown, but not indicated
alt.Chart(compare_HAI.loc[compare_HAI['virus'] =='A/Hawaii/70/2019']).mark_point(filled=True, size=75).encode(
    x=alt.X('NGS_titer', scale=alt.Scale(type="log", domain=[20,200000]),axis=alt.Axis(title="Sequencing-based NT50")),
    y=alt.Y('HAI_titer', scale=alt.Scale(type="log",domain=[2,2000]),axis=alt.Axis(title="HAI Titer")),
    color =alt.Color('timepoint:N', sort=['0','30'], title="days post vaccination",legend=alt.Legend(orient='right',labelLimit= 0)).scale(range = ['rebeccapurple', 'firebrick']),
).properties(width = 250, height = 250).configure_axis(grid=False, domain=False,labelFontSize=14,titleFontSize=20).configure_legend(titleAlign='left',labelLimit= 0,
titleFontSize=15,
labelFontSize=16
)

In [16]:
#Designate order for conditions to be plotted
condition_order = ['Day 0','Day 30','Day 182']

Plot all the titers:

In [51]:
#Here is a function that generates the interactive plot of titers by individual
def run_titerchart(dataframe, ncols=10):

    virus_selection = alt.selection_point(fields=["virus"], on="mouseover", empty=False)

    serum_selection = alt.selection_point(
        fields=["individual"],
        bind="legend",
        toggle="true",
    )

    sera = dataframe["individual"].unique().tolist()

    titers_chart = (
        alt.Chart(dataframe)
        .add_params(virus_selection, serum_selection)
        .transform_filter(serum_selection)
        .encode(
            alt.X(
                "titer",
                title="neutralization titer",
                scale=alt.Scale(nice=False, padding=4, type="log", domain=[20,200000]),
                axis=alt.Axis(labelOverlap=True),
            ),
            alt.Y("virus", sort=viruses),
            alt.Color("condition", sort=condition_order, title="Timepoint",scale=alt.Scale(range=['rebeccapurple', 'firebrick', 'mediumseagreen'])),
            alt.Facet(
                "participant:N",
                header=alt.Header(
                    title=None, labelFontSize=11, labelFontStyle="bold", labelPadding=0
                ),
                spacing=3,
                columns=ncols,
            ),
            alt.StrokeWidth(
                "individual:N",
                scale=alt.Scale(domain=sera, range=[1] * len(sera)),
                legend=alt.Legend(
                    orient="bottom",
                    columns=ncols,
                    symbolLimit=0,
                    symbolFillColor="black",
                    title="participant (click to select)",
                ),
            ),
            tooltip=[
                alt.Tooltip(c, format=".3g") if titers[c].dtype == float else c
                for c in titers.columns
            ],
        )
        .mark_line(point=True)
        .configure_axis(grid=False)
        .configure_point(size=45)
        .properties(
            height=alt.Step(9),
            width=110,
            title=alt.TitleParams(
                "Interactive chart of serum neutralization titers",
                subtitle="Mouseover points for details, click serum legend at bottom to select sera to show",
                fontSize=15,
                dx=100,
                dy=-5,
            ),
            autosize=alt.AutoSizeParams(resize=True),
        )
    )

    return titers_chart

In [52]:
def run_titerchart_rot(dataframe, ncols=10):

    virus_selection = alt.selection_point(fields=["virus"], on="mouseover", empty=False)

    serum_selection = alt.selection_point(
        fields=["individual"],
        bind="legend",
        toggle="true",
    )

    sera = dataframe["individual"].unique().tolist()

    titers_chart = (
        alt.Chart(dataframe)
        .add_params(virus_selection, serum_selection)
        .transform_filter(serum_selection)
        .encode(
            alt.Y(
                "titer",
                title="neutralization titer",
                scale=alt.Scale(nice=False, padding=4, type="log"),
                axis=alt.Axis(labelOverlap=True),
            ),
            alt.X("virus", sort=viruses),
            alt.Color("condition", sort=condition_order, title="Timepoint",scale=alt.Scale(range=['rebeccapurple', 'firebrick', 'mediumseagreen'])),
            alt.Facet(
                "participant:N",
                header=alt.Header(
                    title=None, labelFontSize=11, labelFontStyle="bold", labelPadding=0
                ),
                spacing=3,
                columns=ncols,
            ),
            alt.StrokeWidth(
                "individual:N",
                scale=alt.Scale(domain=sera, range=[1] * len(sera)),
                legend=alt.Legend(
                    orient="bottom",
                    columns=ncols,
                    symbolLimit=0,
                    symbolFillColor="black",
                    title="participant (click to select)",
                ),
            ),
            tooltip=[
                alt.Tooltip(c, format=".3g") if titers[c].dtype == float else c
                for c in titers.columns
            ],
        )
        .mark_line(point=True)
        .configure_axis(grid=False)
        .configure_point(size=45)
        .properties(
            width=alt.Step(11),
            height=80,
            title=alt.TitleParams(
                "Interactive chart of serum neutralization titers",
                subtitle="Mouseover points for details, click serum legend at bottom to select sera to show",
                fontSize=15,
                dx=100,
                dy=-5,
            ),
            autosize=alt.AutoSizeParams(resize=True),
        )
    )

    return titers_chart

In [53]:
titers_withgroups

Unnamed: 0,cohort,serum,virus,titer,titer_bound,titer_sem,n_replicates,titer_as,individual,condition,group,participant,timepoint,pID
0,DRIVE,D10002d0,A/Bangladesh/2221/2021,123.8,interpolated,13.99,3,midpoint,D10002,Day 0,1xVax,Participant D10002,0,D10002
1,DRIVE,D10002d0,A/Bangladesh/3210810034/2021,364.7,interpolated,69.64,2,midpoint,D10002,Day 0,1xVax,Participant D10002,0,D10002
2,DRIVE,D10002d0,A/Bangladesh/8002/2021,171.0,interpolated,11.16,3,midpoint,D10002,Day 0,1xVax,Participant D10002,0,D10002
3,DRIVE,D10002d0,A/Bangladesh/8036/2021,282.6,interpolated,15.07,3,midpoint,D10002,Day 0,1xVax,Participant D10002,0,D10002
4,DRIVE,D10002d0,A/Belgium/H0017/2022,241.7,interpolated,16.41,3,midpoint,D10002,Day 0,1xVax,Participant D10002,0,D10002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2911,DRIVE,D10426d30,A/Togo/0304/2021,13870.0,interpolated,1124.00,3,midpoint,D10426,Day 30,2xVax,Participant D10426,30,D10426
2912,DRIVE,D10426d30,A/Togo/845/2020,11700.0,interpolated,325.80,3,midpoint,D10426,Day 30,2xVax,Participant D10426,30,D10426
2913,DRIVE,D10426d30,A/Utah/27/2022,5596.0,interpolated,1034.00,3,midpoint,D10426,Day 30,2xVax,Participant D10426,30,D10426
2914,DRIVE,D10426d30,A/Washington/23/2020,6128.0,interpolated,2266.00,3,midpoint,D10426,Day 30,2xVax,Participant D10426,30,D10426


In [54]:
sera_withgap = ['D10175','D10193','D10244']
chart = run_titerchart_rot(titers_withgroups.loc[titers_withgroups["individual"].isin(sera_withgap)], 1)
selected_sera2 = ['D10066','D10011','D10366']

#selected_sera = ['D10396','D10191','D10256','D10041','D10042','D10159']
for df, chart_html, ncols in [
    (titers_withgroups.loc[titers_withgroups["individual"].isin(selected_sera2)], titers_chart_serawithgap_rotated_html, 1),
    (titers_withgroups.loc[titers_withgroups["individual"].isin(sera_withgap)], titers_chart_selectedserum_rotated_html, 1),
]:
    print("\n\n*********************************")
    chart = run_titerchart_rot(df, ncols)
    display(chart)
    print(f"Saving to {chart_html}")
    chart.save(chart_html)



*********************************


Saving to results/plots_for_paper/titers_serawithgap_rotated.html


*********************************


Saving to results/plots_for_paper/titers_selectedserum_rotated.html


In [55]:
#Assign location for plot to be saved to and run chart
selected_sera = ['D10181','D10378','D10366']
selected_sera2 = ['D10066','D10011','D10366']
#selected_sera = ['D10396','D10191','D10256','D10041','D10042','D10159']
for df, chart_html, ncols in [
    (titers_withgroups, titers_chart_html, 10),
    (titers_withgroups.query("group == '1xVax'"), titers_chart_1xVax_html, 5),
    (titers_withgroups.query("group == '2xVax'"), titers_chart_2xVax_html, 5),
    (titers_withgroups.loc[titers_withgroups["individual"].isin(selected_sera)], titers_chart_selectedpeople_html, 3),
]:

    print("\n\n*********************************")
    chart = run_titerchart(df, ncols)
    display(chart)
    print(f"Saving to {chart_html}")
    chart.save(chart_html)



*********************************


Saving to results/plots_for_paper/titers_by_day.html


*********************************


Saving to results/plots_for_paper/titers_1xVax.html


*********************************


Saving to results/plots_for_paper/titers_2xVax.html


*********************************


Saving to results/plots_for_paper/titers_selectedpeople.html


In [38]:
#Pivot table to calculate fold-change make dataframes both with and without individuals who do not have a day 182 sample
NT50s_by_strain = titers_withgroups.pivot_table('titer', ['individual','virus','group'],'condition').reset_index()

NT50s_by_strain['foldchange_d30'] = NT50s_by_strain['Day 30']/NT50s_by_strain['Day 0']
NT50s_by_strain['foldchange_d182'] = NT50s_by_strain['Day 182']/NT50s_by_strain['Day 0']
NT50s_by_strain_w182 = NT50s_by_strain.dropna()
NT50s_by_strain_withmedian_w182 = NT50s_by_strain_w182.drop(columns='individual').groupby(['virus','group']).median().reset_index()

In [56]:
#Make a dataframe that contains only individuals who have a day 182 sample
no182 = NT50s_by_strain.loc[NT50s_by_strain['Day 182'].isna()]['individual'].unique().tolist()
titers_withgroups_w182 = titers_withgroups.loc[~titers_withgroups['individual'].isin(no182)]

['D10002',
 'D10107',
 'D10220',
 'D10241',
 'D10253',
 'D10256',
 'D10291',
 'D10396',
 'D10426']

In [40]:
#calculate the median for each virus group for all samples and for just those that have a day 182 sample
titers_withgroup_calculatemedian = titers_withgroups.drop(columns=['serum','individual','titer_sem','n_replicates','titer_bound','titer_as'])
titers_withgroup_calculatemedian = titers_withgroup_calculatemedian.groupby(['virus','group','condition']).median(numeric_only=True).reset_index().rename(columns={'titer':'median_titer'})
titers_withgroupdayandmedian = titers_withgroups.merge(titers_withgroup_calculatemedian, on=['virus','group','condition'])
titers_withgroupdayandmedian_allday30 = titers_withgroupdayandmedian.loc[~titers_withgroupdayandmedian['condition'].str.contains('182')]

titers_withgroup_calculatemedian_w182 = titers_withgroups_w182.drop(columns=['serum','individual','titer_sem','n_replicates','titer_bound','titer_as'])
titers_withgroup_calculatemedian_w182 = titers_withgroup_calculatemedian_w182.groupby(['virus','group','condition']).median(numeric_only=True).reset_index().rename(columns={'titer':'median_titer'})
titers_withgroup_calculatemedian_w182 = titers_withgroups_w182.merge(titers_withgroup_calculatemedian_w182, on=['virus','group','condition'])

In [41]:
#Run chart for calculating fold change
source = NT50s_by_strain_w182
median_source = NT50s_by_strain_withmedian_w182
range_ = ['#7b4173', '#e45756']
range_ = ['steelblue','goldenrod']

domain_ = ['1xVax','2xVax']
plot_range = [0.5,120]

day_to_plot = "Day 30"

NT50s_forselections_chart = alt.Chart(source).mark_errorband(opacity=0.2,extent="iqr",).encode(
    y=alt.X('foldchange_d30',scale=alt.Scale(domain=plot_range, type="log",nice=False),axis=alt.Axis(grid=False,titleFontSize=12, labelFontSize=12)),
    x=alt.Y("virus", sort=viruses, axis=alt.Axis(title=None,
                                                 labelFontSize=11,labelLimit=300)),
    color=alt.Color("group", title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart = alt.Chart(median_source).mark_line(point=True,strokeWidth=1).encode(
    y=alt.X('foldchange_d30',scale=alt.Scale(domain=plot_range,type="log", nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart_point = alt.Chart(median_source).mark_point(filled=True).encode(
    y=alt.X("foldchange_d30",scale=alt.Scale(domain=plot_range,type="log", nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
    size=alt.value(45),
    opacity=alt.value(1),
).properties(
    height=80,
    width = alt.Step(11))

chart =meanline_chart_point + NT50s_forselections_chart + meanline_chart
chart.layer[0].encoding.x.title = ' titer'
chart.layer[0].title = 'Fold-Change in NT50 with Vaccination'
chart.properties(
    width=80,
    height = alt.Step(11))
chart.save(titers_chart_foldchange_html)
chart

In [42]:
#Run chart for calculating fold change at day 182
NT50s_forselections_chart = alt.Chart(source).mark_errorband(opacity=0.2,extent="iqr",).encode(
    y=alt.X('foldchange_d182',scale=alt.Scale(type='log',domain=plot_range, nice=False),axis=alt.Axis(grid=False,titleFontSize=12, labelFontSize=12)),
    x=alt.Y("virus", sort=viruses, axis=alt.Axis(title=None,
                                                 labelFontSize=11,labelLimit=300)),
    color=alt.Color("group", title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart = alt.Chart(median_source).mark_line(point=True,strokeWidth=1).encode(
    y=alt.X('foldchange_d182',scale=alt.Scale(type='log',domain=plot_range, nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart_point = alt.Chart(median_source).mark_point(filled=True).encode(
    y=alt.X("foldchange_d182",scale=alt.Scale(type='log',domain=plot_range, nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
    size=alt.value(45),
    opacity=alt.value(1),
).properties(
    height=80,
    width = alt.Step(11))

chart =meanline_chart_point + NT50s_forselections_chart + meanline_chart
chart.layer[0].encoding.x.title = ' titer'
chart.layer[0].title = 'Fold-Change in NT50 with Vaccination'
chart.properties(
    width=80,
    height = alt.Step(11))
#chart.save(titers_chart_foldchange_html)
chart

In [43]:
domain_ = ['1xVax','2xVax']
plot_range = [60,30000]
NT50s_forselections_chart = alt.Chart().mark_errorband(opacity=0.2,extent="iqr",).encode(
    alt.Y("titer",scale=alt.Scale(type='log',domain=plot_range, nice=False),axis=alt.Axis(grid=False,titleFontSize=14, labelFontSize=14, title="titer")),
    alt.X("virus", sort=viruses, axis=alt.Axis(title=None,
                                                 labelFontSize=11,labelLimit=300)),
    alt.Color("group", title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart = alt.Chart().mark_line(point=False,strokeWidth=1).encode(
    y=alt.Y("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.X("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart_point = alt.Chart().mark_point(filled=True).encode(
    y=alt.Y("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.X("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
    size=alt.value(45),
    opacity=alt.value(1),
).properties(
    height=80,
    width = alt.Step(11))


day_order = ['Day 0','Day 30','Day 182']
median_chart = alt.layer(meanline_chart_point,NT50s_forselections_chart, meanline_chart, data = titers_withgroupdayandmedian_allday30).facet('condition:N', columns=1)
median_chart.save(titers_chart_median_all_html)
display(median_chart)

In [44]:
domain_ = ['1xVax','2xVax']
plot_range = [60,30000]
NT50s_forselections_chart = alt.Chart().mark_errorband(opacity=0.2,extent="iqr",).encode(
    alt.Y("titer",scale=alt.Scale(type='log',domain=plot_range, nice=False),axis=alt.Axis(grid=False,titleFontSize=14, labelFontSize=14, title="titer")),
    alt.X("virus", sort=viruses, axis=alt.Axis(title=None,
                                                 labelFontSize=11,labelLimit=300)),
    alt.Color("group", title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart = alt.Chart().mark_line(point=False,strokeWidth=1).encode(
    y=alt.Y("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.X("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart_point = alt.Chart().mark_point(filled=True).encode(
    y=alt.Y("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.X("virus", sort=viruses,
            title="virus"),
    color=alt.Color("group",title="group",scale=alt.
                    Scale(domain=domain_, range=range_)),
    size=alt.value(45),
    opacity=alt.value(1),
).properties(
    height=80,
    width = alt.Step(11))


day_order = ['Day 0','Day 30','Day 182']
median_chart = alt.layer(meanline_chart_point,NT50s_forselections_chart, meanline_chart, data = titers_withgroup_calculatemedian_w182).facet('condition:N', columns=1)
median_chart.save(titers_chart_median_w182_html)
display(median_chart)

In [45]:
#Make plot with all timepoints for all samples that have a Day 182 sample, facet by group
range_ = ['rebeccapurple', 'firebrick', 'mediumseagreen']
domain_ = ['Day 0','Day 30','Day 182']
plot_range = [60,30000]

NT50s_forselections_chart = alt.Chart().mark_errorband(opacity=0.2,extent="iqr",).encode(
    alt.Y("titer",scale=alt.Scale(type='log',domain=plot_range, nice=False),axis=alt.Axis(grid=False,titleFontSize=14, labelFontSize=14, title="titer")),
    alt.X("virus", sort=viruses, axis=alt.Axis(title=None,
                                                 labelFontSize=11,labelLimit=300)),
    alt.Color("condition", title="day",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart = alt.Chart().mark_line(point=False,strokeWidth=1).encode(
    y=alt.X("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("condition",title="day",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart_point = alt.Chart().mark_point(filled=True).encode(
    y=alt.X("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("condition",title="day",scale=alt.
                    Scale(domain=domain_, range=range_)),
    size=alt.value(45),
    opacity=alt.value(1),
).properties(
    height=80,
    width = alt.Step(11))



median_chart = alt.layer(meanline_chart_point,NT50s_forselections_chart, meanline_chart, data = titers_withgroup_calculatemedian_w182).facet(column=alt.Column('group', header=alt.Header(title=None, labelFontSize=12, labelFontStyle="bold", labelPadding=0)))
median_chart.save(titers_chart_splitbygroup_w182_html)
display(median_chart)

In [46]:
#Make plot with all timepoints for all samples that have a Day 182 sample, facet by group

range_ = ['rebeccapurple', 'firebrick', 'mediumseagreen']
domain_ = ['Day 0','Day 30','Day 182']
plot_range = [60,30000]

NT50s_forselections_chart = alt.Chart().mark_errorband(opacity=0.2,extent="iqr",).encode(
    alt.Y("titer",scale=alt.Scale(type='log',domain=plot_range, nice=False),axis=alt.Axis(grid=False,titleFontSize=14, labelFontSize=14, title="titer")),
    alt.X("virus", sort=viruses, axis=alt.Axis(title=None,
                                                 labelFontSize=11,labelLimit=300)),
    alt.Color("condition", title="day",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart = alt.Chart().mark_line(point=False,strokeWidth=1).encode(
    y=alt.X("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("condition",title="day",scale=alt.
                    Scale(domain=domain_, range=range_)),
).properties(
    height=80,
    width = alt.Step(11))
meanline_chart_point = alt.Chart().mark_point(filled=True).encode(
    y=alt.X("median_titer",scale=alt.Scale(domain=plot_range, nice=False)),
    x=alt.Y("virus", sort=viruses,
            title="virus"),
    color=alt.Color("condition",title="day",scale=alt.
                    Scale(domain=domain_, range=range_)),
    size=alt.value(45),
    opacity=alt.value(1),
).properties(
    height=80,
    width = alt.Step(11))



median_chart = alt.layer(meanline_chart_point,NT50s_forselections_chart, meanline_chart, data = titers_withgroupdayandmedian_allday30).facet(column=alt.Column('group', header=alt.Header(title=None, labelFontSize=12, labelFontStyle="bold", labelPadding=0)))
median_chart.save(titers_chart_splitbygroup_all_html)
display(median_chart)

In [47]:
stats.ttest_ind(titers_withgroups_w182.query("group == '1xVax'").query("condition == 'Day 0'")['titer'], titers_withgroups_w182.query("group == '2xVax'").query("condition == 'Day 0'")['titer'], equal_var = False)

TtestResult(statistic=-10.835545710131294, pvalue=3.69068854172297e-24, df=399.582462008698)

In [48]:
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[titers_withgroups['virus'] == virus]
    ttest = stats.mannwhitneyu(titers_withgroups_w182.query("group == '1xVax'").query("condition == 'Day 0'")['titer'], titers_withgroups_w182.query("group == '2xVax'").query("condition == 'Day 0'")['titer'])
    if ttest[1] < 0.05:
        print(virus, ttest[1])

A/California/07/2009 8.386562967151912e-32
A/Michigan/45/2015 8.386562967151912e-32
A/Brisbane/02/2018 8.386562967151912e-32
A/Ghana/2080/2020 8.386562967151912e-32
A/Cote_DIvoire/1448/2021 8.386562967151912e-32
A/Togo/845/2020 8.386562967151912e-32
A/Togo/0274/2021 8.386562967151912e-32
A/Ghana/138/2020 8.386562967151912e-32
A/Hawaii/70/2019 8.386562967151912e-32
A/Niger/10217/2021 8.386562967151912e-32
A/SouthAfrica/R16462/2021 8.386562967151912e-32
A/Nimes/871/2021 8.386562967151912e-32
A/Belgium/H0038/2022 8.386562967151912e-32
A/Paris/30353/2021 8.386562967151912e-32
A/Paris/31196/2021 8.386562967151912e-32
A/Togo/0304/2021 8.386562967151912e-32
A/England/220200318/2022 8.386562967151912e-32
A/Belgium/H0017/2022 8.386562967151912e-32
A/Washington/23/2020 8.386562967151912e-32
A/Wisconsin/588/2019 8.386562967151912e-32
A/India-PUN-NIV328484/2021 8.386562967151912e-32
A/Norway/25089/2022 8.386562967151912e-32
A/SouthAfrica/R14850/2021 8.386562967151912e-32
A/India/Pun-NIV312851/2021

In [49]:
#Just look at individuals with a day 182 sample
print("Statistically significant Day 0 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[~titers_withgroups_w182['individual'].isin(no182)].loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.ttest_ind(titers_totest.query("group == '1xVax'").query("condition == 'Day 0'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 0'")['titer'], equal_var = False)
    if ttest[1] < 0.05:
        print(virus, ttest[1])
print("\n")
print("Statistically significant Day 30 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[~titers_withgroups_w182['individual'].isin(no182)].loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.ttest_ind(titers_totest.query("group == '1xVax'").query("condition == 'Day 30'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 30'")['titer'], equal_var = False)
    if ttest[1] < 0.05:
        print(virus, ttest[1])
print("\n")
print("Statistically significant Day 182 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[~titers_withgroups_w182['individual'].isin(no182)].loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.ttest_ind(titers_totest.query("group == '1xVax'").query("condition == 'Day 182'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 182'")['titer'], equal_var = False)
    if ttest[1] < 0.05:
        print(virus, ttest[1])

Statistically significant Day 0 samples:
A/California/07/2009 0.02649380944083279
A/Michigan/45/2015 0.021840856160849662
A/Brisbane/02/2018 0.03995272087714563
A/Ghana/2080/2020 0.047331390477300804
A/Cote_DIvoire/1448/2021 0.025923839791756297
A/Togo/845/2020 0.013077978444747751
A/Togo/0274/2021 0.048016277188428526
A/Ghana/138/2020 0.042427057861893186
A/Hawaii/70/2019 0.04951253264475823
A/Niger/10217/2021 0.0267294270910931
A/SouthAfrica/R16462/2021 0.02116149868840774
A/Nimes/871/2021 0.02445568876209847
A/Belgium/H0038/2022 0.023339997359826035
A/Paris/30353/2021 0.021761867356808892
A/Paris/31196/2021 0.014097035977156941
A/Togo/0304/2021 0.03899340955961556
A/England/220200318/2022 0.008980493548324904
A/Belgium/H0017/2022 0.01147556303657342
A/Washington/23/2020 0.022400917237939254
A/Wisconsin/588/2019 0.019132880380797225
A/SouthAfrica/R14850/2021 0.033256184352776816
A/India/Pun-NIV312851/2021 0.04291240775627828


Statistically significant Day 30 samples:
A/England/22020

In [50]:
print("Statistically significant Day 0 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[~titers_withgroups_w182['individual'].isin(no182)].loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.median_test(titers_totest.query("group == '1xVax'").query("condition == 'Day 0'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 0'")['titer'],ties="ignore")
    if ttest[1] < 0.05:
        print(virus, ttest[1])
print("\n")
print("Statistically significant Day 30 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[~titers_withgroups_w182['individual'].isin(no182)].loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.median_test(titers_totest.query("group == '1xVax'").query("condition == 'Day 30'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 30'")['titer'],ties="ignore")
    if ttest[1] < 0.05:
        print(virus, ttest[1])
print("\n")
print("Statistically significant Day 182 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[~titers_withgroups_w182['individual'].isin(no182)].loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.median_test(titers_totest.query("group == '1xVax'").query("condition == 'Day 182'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 182'")['titer'],ties="ignore")
    if ttest[1] < 0.05:
        print(virus, ttest[1])

Statistically significant Day 0 samples:
A/Togo/0274/2021 0.025347318677468325
A/Hawaii/70/2019 0.025347318677468325
A/Togo/0304/2021 0.025347318677468325
A/England/220200318/2022 0.00700094198944864
A/Belgium/H0017/2022 0.00700094198944864


Statistically significant Day 30 samples:
A/Paris/30353/2021 0.025347318677468325
A/England/220200318/2022 0.025347318677468325
A/Wisconsin/588/2019 0.00700094198944864


Statistically significant Day 182 samples:


In [35]:
print("Statistically significant Day 0 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.mannwhitneyu(titers_totest.query("group == '1xVax'").query("condition == 'Day 0'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 0'")['titer'])
    if ttest[1] < 0.05:
        print(virus, ttest)
print("\n")
print("Statistically significant Day 30 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.mannwhitneyu(titers_totest.query("group == '1xVax'").query("condition == 'Day 30'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 30'")['titer'])
    if ttest[1] < 0.05:
        print(virus, ttest)
print("\n")
print("Statistically significant Day 182 samples:")
for virus in viruses:
    titers_totest = titers_withgroups_w182.loc[titers_withgroups_w182['virus'] == virus]
    ttest = stats.mannwhitneyu(titers_totest.query("group == '1xVax'").query("condition == 'Day 182'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 182'")['titer'])
    if ttest[1] < 0.05:
        print(virus, ttest)

Statistically significant Day 0 samples:
A/California/07/2009 MannwhitneyuResult(statistic=25.0, pvalue=0.037771348951416615)
A/Michigan/45/2015 MannwhitneyuResult(statistic=26.0, pvalue=0.044759294575572275)
A/Brisbane/02/2018 MannwhitneyuResult(statistic=21.0, pvalue=0.01832430775151698)
A/Ghana/2080/2020 MannwhitneyuResult(statistic=22.0, pvalue=0.02210370491556239)
A/Cote_DIvoire/1448/2021 MannwhitneyuResult(statistic=21.0, pvalue=0.01832430775151698)
A/Togo/845/2020 MannwhitneyuResult(statistic=22.0, pvalue=0.02210370491556239)
A/Togo/0274/2021 MannwhitneyuResult(statistic=24.0, pvalue=0.03173399375689655)
A/Ghana/138/2020 MannwhitneyuResult(statistic=24.0, pvalue=0.03173399375689655)
A/Hawaii/70/2019 MannwhitneyuResult(statistic=21.0, pvalue=0.01832430775151698)
A/SouthAfrica/R16462/2021 MannwhitneyuResult(statistic=22.0, pvalue=0.02210370491556239)
A/Nimes/871/2021 MannwhitneyuResult(statistic=25.0, pvalue=0.037771348951416615)
A/Belgium/H0038/2022 MannwhitneyuResult(statistic=2

In [36]:
print("Statistically significant Day 0 samples:")
for virus in viruses:
    titers_totest = titers_withgroups.loc[titers_withgroups['virus'] == virus]
    ttest = stats.mannwhitneyu(titers_totest.query("group == '1xVax'").query("condition == 'Day 0'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 0'")['titer'])
    if ttest[1] < 0.05:
        print(virus, ttest)
print("\n")
print("Statistically significant Day 30 samples:")
for virus in viruses:
    titers_totest = titers_withgroups.loc[titers_withgroups['virus'] == virus]
    ttest = stats.mannwhitneyu(titers_totest.query("group == '1xVax'").query("condition == 'Day 30'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 30'")['titer'])
    if ttest[1] < 0.05:
        print(virus, ttest)
print("\n")
print("Statistically significant Day 182 samples:")
for virus in viruses:
    titers_totest = titers_withgroups.loc[titers_withgroups['virus'] == virus]
    ttest = stats.mannwhitneyu(titers_totest.query("group == '1xVax'").query("condition == 'Day 182'")['titer'], titers_totest.query("group == '2xVax'").query("condition == 'Day 182'")['titer'])
    if ttest[1] < 0.05:
        print(virus, ttest)

Statistically significant Day 0 samples:
A/California/07/2009 MannwhitneyuResult(statistic=51.0, pvalue=0.01140098291155517)
A/Michigan/45/2015 MannwhitneyuResult(statistic=52.0, pvalue=0.012821684691391543)
A/Brisbane/02/2018 MannwhitneyuResult(statistic=42.0, pvalue=0.0036905851986807135)
A/Ghana/2080/2020 MannwhitneyuResult(statistic=41.0, pvalue=0.0032301435955866646)
A/Cote_DIvoire/1448/2021 MannwhitneyuResult(statistic=38.0, pvalue=0.0021450570245935504)
A/Togo/845/2020 MannwhitneyuResult(statistic=38.0, pvalue=0.0021450570245935504)
A/Togo/0274/2021 MannwhitneyuResult(statistic=40.0, pvalue=0.0028226386759533724)
A/Ghana/138/2020 MannwhitneyuResult(statistic=41.0, pvalue=0.0032301435955866646)
A/Hawaii/70/2019 MannwhitneyuResult(statistic=38.0, pvalue=0.0021450570245935504)
A/Niger/10217/2021 MannwhitneyuResult(statistic=49.0, pvalue=0.00897202481281498)
A/SouthAfrica/R16462/2021 MannwhitneyuResult(statistic=36.0, pvalue=0.001619713575230349)
A/Nimes/871/2021 MannwhitneyuResult(