In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
import os

import seaborn as sns

pd.options.mode.chained_assignment = None

In [5]:
glycan = pd.read_csv("/trials/vaccine/p302/s001/qdata/LabData/AE_assays_pass-through/Glycan_array/processed_by_sdmc/HVTN302_Glycan_Data_Processed_2024-09-04.txt", sep="\t")

[0m[00;32mHVTN302_Glycan_Data_Processed_2024-09-04.txt[0m*          [01;34marchive[0m/
[00;32mHVTN302_glycan_microarray_sdmc_processing_notes.html[0m*


In [58]:
filepath_isotypes = "/trials/vaccine/p302/s001/qdata/LabData/AE_assays_pass-through/Glycan_array/processed_by_sdmc/HVTN302_Glycan_Data_Processed_2024-09-04.txt"
df_glycan_isotypes = pd.read_csv(filepath_isotypes, sep="\t")

In [59]:
usecols = ['guspec',
           'isotype', 
           'ptid', 
           'visitno',
           'spot_name', 
           'glycan_m_number', 
           'background_subtraced_mean_signal']

df = df_glycan_isotypes[usecols]

In [60]:
df.guspec = df.guspec.astype(str)

In [61]:
def centered_mean(x):
    if len(x) >= 6:
        return np.mean(np.sort(x)[1:-1])
    else:
        return np.mean(x)

df['centered_mean'] = df.groupby(['isotype','guspec','glycan_m_number'])[['background_subtraced_mean_signal']].transform(centered_mean)
df = df.drop(columns='background_subtraced_mean_signal').drop_duplicates()

df = df.reset_index(drop=True)

In [151]:
t = df.loc[(df.centered_mean > 100)].groupby(['isotype','visitno'])[['guspec']].count()

In [153]:
t.columns = ['responders_total']

In [83]:
calc_responses = df.copy()
calc_responses['threshold'] = 100
calc_responses['response_flag'] = calc_responses.centered_mean > 100
calc_responses["count_of_responses"] = calc_responses.groupby(['isotype','visitno','glycan_m_number'])[['response_flag']].transform('sum')
calc_responses["prop_of_responses"] = calc_responses.groupby(['isotype','visitno','glycan_m_number'])[['response_flag']].transform(lambda x: x.sum()/len(x))

calc_responses["overall_response_rate"] = calc_responses.groupby(['isotype','glycan_m_number'])[['response_flag']].transform(lambda x: x.sum()/len(x))
calc_responses = calc_responses.sort_values(by='overall_response_rate', ascending=False)

In [15]:
savedir = '/networks/vtn/lab/SDMC_labscience/operations/documents/templates/assay/template_testing/hvtn_glycan_plots/'

In [24]:
np.sort(calc_responses.visitno.unique())

array([ 2.,  6.,  8., 10., 12.])

In [26]:
isotypes = ['IgE','IgM','IgG']
useticks = {}
useticks["IgG"] = [100, 1_000, 10_000, 15_000]
useticks["IgE"] = [100, 1_000, 1_700]
useticks["IgM"] = [100, 1_000]

ylim = {}
ylim['IgG'] = (100, 15_000)
ylim['IgE'] = (100, 1_700)
ylim['IgM'] = (100, 1_000)

In [1]:
isotype = "IgG"
glycan_ordering = calc_responses.loc[calc_responses.isotype==isotype,['glycan_m_number','overall_response_rate']].drop_duplicates().glycan_m_number.tolist()
K = (calc_responses.loc[calc_responses.isotype==isotype].groupby(['isotype','glycan_m_number']).count_of_responses.max() > 3).sum()

glycan_ordering = glycan_ordering[:K] # K = 34

t = 0
remaining_glycans = glycan_ordering
while len(remaining_glycans)>0:
    t += 1
    N = 4
    M = 3
    visitnos = [2.,  6.,  8., 10., 12.]
    
    fig, ax = plt.subplots(nrows=N, ncols=M, figsize=(9,N*3.8), sharey=True)
    for i in range(N):
        for j in range(M):
            k = i*M + j
            if k < len(remaining_glycans):
                glycan = remaining_glycans[k]
            
                g = calc_responses.loc[(calc_responses.glycan_m_number==glycan) & (calc_responses.isotype==isotype) & (calc_responses.centered_mean >= 100)]
                data = [list(g.loc[(g.visitno==v)].centered_mean) for v in visitnos]
                Ns = [str(len(i)) for i in data]
                ax[i,j].boxplot(data)
                ax[i,j].set_xticks(ticks=np.arange(1,6), labels=[f"N={Ns[i]}\nV{visitnos[i]}" for i in range(5)])
                ax[i,j].set_yscale("log")
                # ax[i,j].set_ylim(ylim[isotype])
                ax[i,j].set_yticks(useticks[isotype])

                if len(glycan) > 20:
                    usethis = str(glycan.replace(' ','\n'))
                    ax[i,j].set_title(usethis, fontsize=10)
                else:
                    ax[i,j].set_title(f"{glycan}", fontsize=10)
                if j==0:
                    ax[i,j].set_ylabel("centered mean of\nbackground-normalized signal")
    
    fig.tight_layout()
    plt.suptitle(f"{isotype} - glycans w/ >=3 responders", fontsize=14, y=1.005)
    plt.savefig(savedir + f"{isotype}_boxplots{t}_min3.png", dpi=320, format='png', transparent=False, bbox_inches='tight', pad_inches=0.3)
    remaining_glycans = remaining_glycans[N*M:]

imgs = [f"{isotype}_boxplots{i}_min3.png" for i in np.arange(1,4)]

images = [
    Image.open(savedir + f)
    for f in imgs
]

pdf_path = savedir + f"{isotype}_glycan_boxplots_min3.pdf"
    
images[0].save(
    pdf_path, "PDF" ,resolution=100.0, save_all=True, append_images=images[1:]
)

In [41]:
isotype = "IgE"
glycan_ordering = calc_responses.loc[calc_responses.isotype==isotype,['glycan_m_number','overall_response_rate']].drop_duplicates().glycan_m_number.tolist()
K = (calc_responses.loc[calc_responses.isotype==isotype].groupby(['isotype','glycan_m_number']).count_of_responses.max() > 3).sum()

glycan_ordering = glycan_ordering[:K] # K = 34

In [2]:
t = 0
remaining_glycans = glycan_ordering
while len(remaining_glycans)>0:
    t += 1
    N = 4
    M = 3
    visitnos = [2.,  6.,  8., 10., 12.]
    
    fig, ax = plt.subplots(nrows=N, ncols=M, figsize=(9,N*4), sharey=True)
    for i in range(N):
        for j in range(M):
            k = i*M + j
            if k < len(remaining_glycans):
                glycan = remaining_glycans[k]
            
                g = calc_responses.loc[(calc_responses.glycan_m_number==glycan) & (calc_responses.isotype==isotype) & (calc_responses.centered_mean >= 100)]
                data = [list(g.loc[(g.visitno==v)].centered_mean) for v in visitnos]
                Ns = [str(len(i)) for i in data]
                ax[i,j].boxplot(data)
                ax[i,j].set_xticks(ticks=np.arange(1,6), labels=[f"N={Ns[i]}\nV{visitnos[i]}" for i in range(5)])
                ax[i,j].set_yscale("log")
                # ax[i,j].set_ylim(ylim[isotype])
                ax[i,j].set_yticks(useticks[isotype])

                if len(glycan) > 20:
                    usethis = str(glycan.replace(' ','\n'))
                    ax[i,j].set_title(usethis, fontsize=10)
                else:
                    ax[i,j].set_title(f"{glycan}", fontsize=10)
                if j==0:
                    ax[i,j].set_ylabel("centered mean of\nbackground-normalized signal")
    
    fig.tight_layout()
    plt.suptitle(f"{isotype} - glycans w/ >=3 responders", fontsize=14, y=1.005)
    plt.savefig(savedir + f"{isotype}_boxplots{t}_min3.png", dpi=320, format='png', transparent=False, bbox_inches='tight', pad_inches=0.3)
    remaining_glycans = remaining_glycans[N*M:]

In [43]:
imgs = [f"IgG_boxplots{i}_min3.png" for i in np.arange(1,4)] + [f"IgE_boxplots{i}_min3.png" for i in np.arange(1,3)]

In [44]:
images = [
    Image.open(savedir + f)
    for f in imgs
]

pdf_path = savedir + f"hvtn_glycan_boxplots_min3.pdf"
    
images[0].save(
    pdf_path, "PDF" ,resolution=100.0, save_all=True, append_images=images[1:]
)

In [None]:
isotype = "IgG"
glycan_ordering = calc_responses.loc[calc_responses.isotype==isotype,['glycan_m_number','overall_response_rate']].drop_duplicates().glycan_m_number.tolist()
K = (calc_responses.loc[calc_responses.isotype==isotype].groupby(['isotype','glycan_m_number']).count_of_responses.max() > 3).sum()

glycan_ordering = glycan_ordering[:K] # K = 34

t = 0
remaining_glycans = glycan_ordering
while len(remaining_glycans)>0:
    t += 1
    N = 4
    M = 3
    visitnos = [2.,  6.,  8., 10., 12.]
    
    fig, ax = plt.subplots(nrows=N, ncols=M, figsize=(9,N*3.8), sharey=True)
    for i in range(N):
        for j in range(M):
            k = i*M + j
            if k < len(remaining_glycans):
                glycan = remaining_glycans[k]
            
                g = calc_responses.loc[(calc_responses.glycan_m_number==glycan) & (calc_responses.isotype==isotype)]
                data = [list(g.loc[(g.visitno==v)].centered_mean) for v in visitnos]
                Ns = [str(len(i)) for i in data]
                ax[i,j].boxplot(data)
                ax[i,j].set_xticks(ticks=np.arange(1,6), labels=[f"N={Ns[i]}\nV{visitnos[i]}" for i in range(5)])
                ax[i,j].set_yscale("log")
                # ax[i,j].set_ylim(ylim[isotype])
                # ax[i,j].set_yticks(useticks[isotype])

                if len(glycan) > 20:
                    usethis = str(glycan.replace(' ','\n'))
                    ax[i,j].set_title(usethis, fontsize=10)
                else:
                    ax[i,j].set_title(f"{glycan}", fontsize=10)
                if j==0:
                    ax[i,j].set_ylabel("centered mean of\nbackground-normalized signal")
    
    fig.tight_layout()
    plt.suptitle(f"{isotype} - glycans w/ >=3 responders - no truncation", fontsize=14, y=1.005)
    plt.savefig(savedir + f"{isotype}_boxplots{t}_min3.png", dpi=320, format='png', transparent=False, bbox_inches='tight', pad_inches=0.3)
    remaining_glycans = remaining_glycans[N*M:]

In [None]:
t = 0
remaining_glycans = glycan_ordering
while len(remaining_glycans)>0:
    t += 1
    N = 4
    M = 3
    visitnos = [2.,  6.,  8., 10., 12.]
    
    fig, ax = plt.subplots(nrows=N, ncols=M, figsize=(9,N*4), sharey=True)
    for i in range(N):
        for j in range(M):
            k = i*M + j
            if k < len(remaining_glycans):
                glycan = remaining_glycans[k]
            
                g = calc_responses.loc[(calc_responses.glycan_m_number==glycan) & (calc_responses.isotype==isotype)]
                data = [list(g.loc[(g.visitno==v)].centered_mean) for v in visitnos]
                Ns = [str(len(i)) for i in data]
                ax[i,j].boxplot(data)
                ax[i,j].set_xticks(ticks=np.arange(1,6), labels=[f"N={Ns[i]}\nV{visitnos[i]}" for i in range(5)])
                ax[i,j].set_yscale("log")
                # ax[i,j].set_ylim(ylim[isotype])
                # ax[i,j].set_yticks(useticks[isotype])

                if len(glycan) > 20:
                    usethis = str(glycan.replace(' ','\n'))
                    ax[i,j].set_title(usethis, fontsize=10)
                else:
                    ax[i,j].set_title(f"{glycan}", fontsize=10)
                if j==0:
                    ax[i,j].set_ylabel("centered mean of\nbackground-normalized signal")
    
    fig.tight_layout()
    plt.suptitle(f"{isotype} - glycans w/ >=3 responders, no truncation", fontsize=14, y=1.005)
    plt.savefig(savedir + f"{isotype}_boxplots{t}_min3_no_truncation.png", dpi=320, format='png', transparent=False, bbox_inches='tight', pad_inches=0.3)
    remaining_glycans = remaining_glycans[N*M:]