In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os

In [54]:
pd.options.mode.chained_assignment = None

Your summary table below is consistent with the G002 results.  So it makes me think maybe there’s something to this decline after vaccination.  I think it would be interesting to see if the magnitude drops over time as well.  I think it would be nice to see the trajectory for each participant over time by taking the average response across the 95 glycans after truncating at 100 and make spaghetti plots of the trajectories

In [58]:
filepath_isotypes = '/networks/cavd/VDCs/Schief/Schief_856-G002/SkinReactions/data/Glycan_array_Scripps/processed_data/DRAFT_CAVD_G002_Glycan_Microarray_data_processed_2024-10-16.txt'
df_glycan_isotypes = pd.read_csv(filepath_isotypes, sep="\t")

usecols = ['sample_id',
           'isotype', 
           'ptid', 
           'study_week',
           'spot_name', 
           'glycan_m_number', 
           'background_subtraced_mean_signal']

df = df_glycan_isotypes[usecols]

  df_glycan_isotypes = pd.read_csv(filepath_isotypes, sep="\t")


In [59]:
df.sample_id = df.sample_id.astype(str)

In [60]:
def centered_mean(x):
    if len(x) >= 6:
        return np.mean(np.sort(x)[1:-1])
    else:
        return np.mean(x)

df['centered_mean'] = df.groupby(['isotype','sample_id','glycan_m_number'])[['background_subtraced_mean_signal']].transform(centered_mean)
df = df.drop(columns='background_subtraced_mean_signal').drop_duplicates()

In [None]:
df_truncated = df.loc[df.centered_mean > 100]

avg_response = df_truncated.groupby(['ptid','isotype','study_week'])[['centered_mean']].mean().reset_index()
avg_response['timept'] = avg_response.study_week.map({
    'Wk 0':0,
    'Wk 8':1,
    'Wk 10':2
})
avg_response = avg_response.sort_values(by='timept')

In [1]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,3), sharey=True)

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in avg_response.ptid.unique():
        select = (avg_response.ptid==ppt) & (avg_response.isotype==iso)
        ax[i].plot(avg_response.loc[select].study_week, avg_response.loc[select].centered_mean)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log")
    
ax[0].set_ylabel("Centered mean of background-normalized\nsignal, avgd over glycans")

txt="Discarded all data <= 100, then took average over remaining glycans per isotype/ppt/timept"
plt.figtext(0.5, -0.07, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Discarding data <= 100", y=1.03)
plt.show()

In [135]:
df_floor = df.copy()
df_floor.loc[df_floor.centered_mean < 100,'centered_mean'] = 100

avg_response_floor = df_floor.groupby(['ptid','isotype','study_week'])[['centered_mean']].mean().reset_index()
avg_response_floor['timept'] = avg_response_floor.study_week.map({
    'Wk 0':0,
    'Wk 8':1,
    'Wk 10':2
})
avg_response_floor = avg_response_floor.sort_values(by='timept')

In [2]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,3), sharey=True)

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in avg_response_floor.ptid.unique():
        select = (avg_response_floor.ptid==ppt) & (avg_response_floor.isotype==iso)
        ax[i].plot(avg_response_floor.loc[select].study_week, avg_response_floor.loc[select].centered_mean)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log", base=10)

ax[0].set_ylabel("Centered mean of background-normalized\nsignal, avgd over glycans")

txt="Floored all data at 100, then took average over glycans per isotype/ppt/timept."
plt.figtext(0.5, -0.07, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Floor at 100", y=1.03)
plt.show()

In [3]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,3))

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in avg_response_floor.ptid.unique():
        select = (avg_response_floor.ptid==ppt) & (avg_response_floor.isotype==iso)
        ax[i].plot(avg_response_floor.loc[select].study_week, avg_response_floor.loc[select].centered_mean)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log", base=10)

ax[0].set_ylabel("Centered mean of background-normalized\nsignal, avgd over glycans")

txt="Floored all data at 100, then took average over glycans per isotype/ppt/timept."
plt.figtext(0.5, -0.07, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Floor at 100", y=1.03)
plt.show()

In [137]:
df_truncate_conditional = df_truncated[['isotype','ptid','spot_name']].drop_duplicates()
df_truncate_conditional = df_truncate_conditional.merge(df, on=list(df_truncate_conditional.columns), how='left')

avg_response_truncate_conditional = df_truncate_conditional.groupby(['ptid','isotype','study_week'])[['centered_mean']].mean().reset_index()
avg_response_truncate_conditional['timept'] = avg_response_truncate_conditional.study_week.map({
    'Wk 0':0,
    'Wk 8':1,
    'Wk 10':2
})
avg_response_truncate_conditional = avg_response_truncate_conditional.sort_values(by='timept')

In [4]:
avg_response_truncate_conditional_A = avg_response_truncate_conditional.copy()
avg_response_truncate_conditional_A.loc[avg_response_truncate_conditional_A.centered_mean <= 1, 'centered_mean'] = 1

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,3), sharey=True)

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in avg_response_truncate_conditional_A.ptid.unique():
        select = (avg_response_truncate_conditional_A.ptid==ppt) & (avg_response_truncate_conditional_A.isotype==iso)
        ax[i].plot(avg_response_truncate_conditional_A.loc[select].study_week, avg_response_truncate_conditional_A.loc[select].centered_mean)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log")

ax[0].set_ylabel("Centered mean of background-normalized\nsignal, avgd over glycans")

txt="Kept all timepts for any isotype/ppt/glycan that had at least one timept above 100, discarded the rest. Floored all data at 1 for the\nsake of displaying on a log scale, then took the average over remaining glycans per isotype/ppt/timepoint"
plt.figtext(0.5, -0.08, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Conditionally truncated A", y=1.03)
plt.show()

In [5]:
avg_response_truncate_conditional_A = avg_response_truncate_conditional.copy()
avg_response_truncate_conditional_A.loc[avg_response_truncate_conditional_A.centered_mean <= 1, 'centered_mean'] = 1

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,3))

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in avg_response_truncate_conditional_A.ptid.unique():
        select = (avg_response_truncate_conditional_A.ptid==ppt) & (avg_response_truncate_conditional_A.isotype==iso)
        ax[i].plot(avg_response_truncate_conditional_A.loc[select].study_week, avg_response_truncate_conditional_A.loc[select].centered_mean)
        ax[i].scatter(avg_response_truncate_conditional_A.loc[select].study_week, avg_response_truncate_conditional_A.loc[select].centered_mean, s=6)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log")

ax[0].set_ylabel("Centered mean of background-normalized\nsignal, avgd over glycans")

txt="Kept all timepts for any isotype/ppt/glycan that had at least one timept above 100, discarded the rest. Floored all data at 1 for the\nsake of displaying on a log scale, then took the average over remaining glycans per isotype/ppt/timepoint"
plt.figtext(0.5, -0.08, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Conditionally truncated A", y=1.03)
plt.show()

In [6]:
avg_response_truncate_conditional_B = avg_response_truncate_conditional.copy()
avg_response_truncate_conditional_B.loc[avg_response_truncate_conditional_B.centered_mean <= 100, 'centered_mean'] = 100

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,3), sharey=True)

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in avg_response_truncate_conditional_B.ptid.unique():
        select = (avg_response_truncate_conditional_B.ptid==ppt) & (avg_response_truncate_conditional_B.isotype==iso)
        ax[i].plot(avg_response_truncate_conditional_B.loc[select].study_week, avg_response_truncate_conditional_B.loc[select].centered_mean)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log")

ax[0].set_ylabel("Centered mean of background-normalized\nsignal, avgd over glycans")

txt="Kept all timepts for any isotype/ppt/glycan that had at least one timept above 100, discarded the rest. Floored all data at 100,\nthen took the average over remaining glycans per isotype/ppt/timepoint"
plt.figtext(0.5, -0.08, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Conditionally truncated B", y=1.03)
plt.show()

In [7]:
avg_response_truncate_conditional_B = avg_response_truncate_conditional.copy()
avg_response_truncate_conditional_B.loc[avg_response_truncate_conditional_B.centered_mean <= 100, 'centered_mean'] = 100

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,3))

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in avg_response_truncate_conditional_B.ptid.unique():
        select = (avg_response_truncate_conditional_B.ptid==ppt) & (avg_response_truncate_conditional_B.isotype==iso)
        ax[i].plot(avg_response_truncate_conditional_B.loc[select].study_week, avg_response_truncate_conditional_B.loc[select].centered_mean)
        ax[i].scatter(avg_response_truncate_conditional_B.loc[select].study_week, avg_response_truncate_conditional_B.loc[select].centered_mean, s=6)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log")

ax[0].set_ylabel("Centered mean of background-normalized\nsignal, avgd over glycans")

txt="Kept all timepts for any isotype/ppt/glycan that had at least one timept above 100, discarded the rest. Floored all data at 100,\nthen took the average over remaining glycans per isotype/ppt/timepoint"
plt.figtext(0.5, -0.08, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Conditionally truncated B")
plt.tight_layout()
plt.show()