In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
import os

import seaborn as sns

pd.options.mode.chained_assignment = None

In [5]:
filepath_isotypes = '/networks/cavd/VDCs/Schief/Schief_856-G002/SkinReactions/data/Glycan_array_Scripps/processed_data/DRAFT_CAVD_G002_Glycan_Microarray_data_processed_2024-10-16.txt'
df_glycan_isotypes = pd.read_csv(filepath_isotypes, sep="\t")

usecols = ['sample_id',
           'isotype', 
           'ptid', 
           'study_week',
           'spot_name', 
           'glycan_m_number', 
           'background_subtraced_mean_signal']

df = df_glycan_isotypes[usecols]
df.sample_id = df.sample_id.astype(str)

  df_glycan_isotypes = pd.read_csv(filepath_isotypes, sep="\t")


In [6]:
def centered_mean(x):
    if len(x) >= 6:
        return np.mean(np.sort(x)[1:-1])
    else:
        return np.mean(x)

df['centered_mean'] = df.groupby(['isotype','sample_id','glycan_m_number'])[['background_subtraced_mean_signal']].transform(centered_mean)
df = df.drop(columns='background_subtraced_mean_signal').drop_duplicates().reset_index(drop=True)

In [7]:
calc_responses = df.copy()

threshold = 100
calc_responses['response_flag'] = calc_responses.centered_mean > threshold
calc_responses["count_of_responses"] = calc_responses.groupby(['isotype','study_week','glycan_m_number'])[['response_flag']].transform('sum')
calc_responses["prop_of_responses"] = calc_responses.groupby(['isotype','study_week','glycan_m_number'])[['response_flag']].transform(lambda x: x.sum()/len(x))

calc_responses["overall_response_rate"] = calc_responses.groupby(['isotype','glycan_m_number'])[['response_flag']].transform(lambda x: x.sum()/len(x))
calc_responses = calc_responses.sort_values(by='overall_response_rate', ascending=False)

In [8]:
isotypes = ['IgE','IgM','IgG']
useticks = {}
useticks["IgG"] = [100, 1_000, 10_000, 15_000]
useticks["IgE"] = [100, 1_000, 1_700]
useticks["IgM"] = [100, 1_000]

ylim = {}
ylim['IgG'] = (1, 15_000)
ylim['IgE'] = (1, 1_700)
ylim['IgM'] = (1, 1_000)

savedir = '/networks/vtn/lab/SDMC_labscience/operations/documents/templates/assay/template_testing/non_truncated_boxplots_12_05_2024/'

In [1]:
isotype = "IgG"
glycan_ordering = calc_responses.loc[calc_responses.isotype==isotype,['glycan_m_number','overall_response_rate']].drop_duplicates().glycan_m_number.tolist()
glycan_ordering = glycan_ordering[:15]

t = 0
remaining_glycans = glycan_ordering
while len(remaining_glycans)>0:
    t += 1
    N = 3
    M = 5
    weeks = ["Wk 0", "Wk 8", "Wk 10"]
    
    fig, ax = plt.subplots(nrows=N, ncols=M, figsize=(9,N*2.5), sharey=True)
    for i in range(N):
        for j in range(M):
            k = i*M + j
            if k < len(remaining_glycans):
                glycan = remaining_glycans[k]
            
                g = calc_responses.loc[(calc_responses.glycan_m_number==glycan) & (calc_responses.isotype==isotype)]
                data = [list(g.loc[(g.study_week==w)].centered_mean) for w in weeks]
                Ks = [str(len(np.array(i)[np.array(i) > 100])) for i in data]
                Ns = [str(len(i)) for i in data]
                ax[i,j].boxplot(data)
                tick_labels = [f"K/N:{Ks[0]}/{Ns[0]}\n{weeks[i]}"] + [f"{Ks[i]}/{Ns[i]}\n{weeks[i]}" for i in range(1,3)]
                ax[i,j].set_xticks(ticks=np.arange(1,4), labels=tick_labels, fontsize=7.5)
                ax[i,j].set_yscale("log")
                # ax[i,j].set_ylim(ylim[isotype])
                # ax[i,j].set_yticks(useticks[isotype])
                ax[i,j].axhline(y = 100, color = 'r', linestyle = '-', linewidth=0.5) 

                if len(glycan) > 20:
                    usethis = str(glycan.replace(' ','\n'))
                    ax[i,j].set_title(usethis, fontsize=10)
                else:
                    ax[i,j].set_title(f"{glycan}", fontsize=10)
                if j==0:
                    ax[i,j].set_ylabel("centered mean of\nbackground-normalized signal")
    
    fig.tight_layout()
    plt.suptitle(f"{isotype} - glycans w/ >=3 responders", fontsize=14, y=1.005)
    txt="K refers to total responders (points > 100), N is total points visualized"
    plt.figtext(0.5, -0.04, txt, wrap=True, horizontalalignment='center', fontsize=9)
    # plt.savefig(savedir + f"{isotype}_boxplots{t}_not_truncated.png", dpi=320, format='png', transparent=False, bbox_inches='tight', pad_inches=0.3)
    remaining_glycans = remaining_glycans[15:]

In [2]:
isotype = "IgE"
glycan_ordering = calc_responses.loc[calc_responses.isotype==isotype,['glycan_m_number','overall_response_rate']].drop_duplicates().glycan_m_number.tolist()
glycan_ordering = glycan_ordering[:9]

t = 0
remaining_glycans = glycan_ordering
while len(remaining_glycans)>0:
    t += 1
    N = 2
    M = 5
    weeks = ["Wk 0", "Wk 8", "Wk 10"]
    
    fig, ax = plt.subplots(nrows=N, ncols=M, figsize=(9,N*2.7), sharey=True)
    for i in range(N):
        for j in range(M):
            k = i*M + j
            if k < len(remaining_glycans):
                glycan = remaining_glycans[k]
            
                g = calc_responses.loc[(calc_responses.glycan_m_number==glycan) & (calc_responses.isotype==isotype)]
                data = [list(g.loc[(g.study_week==w)].centered_mean) for w in weeks]
                Ks = [str(len(np.array(i)[np.array(i) > 100])) for i in data]
                Ns = [str(len(i)) for i in data]
                ax[i,j].boxplot(data)
                tick_labels = [f"K/N:{Ks[0]}/{Ns[0]}\n{weeks[i]}"] + [f"{Ks[i]}/{Ns[i]}\n{weeks[i]}" for i in range(1,3)]
                ax[i,j].set_xticks(ticks=np.arange(1,4), labels=tick_labels, fontsize=7.5)
                ax[i,j].set_yscale("log")
                # ax[i,j].set_ylim(ylim[isotype])
                # ax[i,j].set_yticks(useticks[isotype])
                ax[i,j].axhline(y = 100, color = 'r', linestyle = '-', linewidth=0.5) 

                if len(glycan) > 20:
                    usethis = str(glycan.replace(' ','\n'))
                    ax[i,j].set_title(usethis, fontsize=10)
                else:
                    ax[i,j].set_title(f"{glycan}", fontsize=10)
                if j==0:
                    ax[i,j].set_ylabel("centered mean of\nbackground-normalized signal")
    
    # ax[N-1,M-1].set_xticks(ticks=np.arange(1,4), labels=weeks)
    fig.tight_layout()
    txt="K refers to total responders (points > 100), N is total points visualized"
    plt.figtext(0.5, -0.04, txt, wrap=True, horizontalalignment='center', fontsize=9)
    plt.suptitle(f"{isotype} - glycans w/ >=3 responders", fontsize=14, y=1.02)
    plt.savefig(savedir + f"{isotype}_boxplots{t}_not_truncated.png", dpi=320, format='png', transparent=False, bbox_inches='tight', pad_inches=0.3)
    remaining_glycans = remaining_glycans[9:]

In [3]:
isotype = "IgM"
glycan_ordering = calc_responses.loc[calc_responses.isotype==isotype,['glycan_m_number','overall_response_rate']].drop_duplicates().glycan_m_number.tolist()
glycan_ordering = glycan_ordering[:2]


t = 0
remaining_glycans = glycan_ordering
while len(remaining_glycans)>0:
    t += 1
    N = 1
    M = 2
    weeks = ["Wk 0", "Wk 8", "Wk 10"]
    
    fig, ax = plt.subplots(nrows=N, ncols=M, figsize=(6,N*3.5), sharey=True)
    for i in range(N):
        for j in range(M):
            k = i*M + j
            if k < len(remaining_glycans):
                glycan = remaining_glycans[k]
            
                g = calc_responses.loc[(calc_responses.glycan_m_number==glycan) & (calc_responses.isotype==isotype)]
                data = [list(g.loc[(g.study_week==w)].centered_mean) for w in weeks]
                Ks = [str(len(np.array(i)[np.array(i) > 100])) for i in data]
                Ns = [str(len(i)) for i in data]
                ax[j].boxplot(data)
                tick_labels = [f"K/N:{Ks[0]}/{Ns[0]}\n{weeks[i]}"] + [f"{Ks[i]}/{Ns[i]}\n{weeks[i]}" for i in range(1,3)]
                ax[j].set_xticks(ticks=np.arange(1,4), labels=tick_labels, fontsize=7.5)
                ax[j].set_yscale("log")
                ax[j].set_ylim(ylim[isotype])
                # ax[j].set_yticks(useticks[isotype])
                ax[j].axhline(y = 100, color = 'r', linestyle = '-', linewidth=0.5) 


                if len(glycan) > 20:
                    usethis = str(glycan.replace(' ','\n'))
                    ax[j].set_title(usethis, fontsize=10)
                else:
                    ax[j].set_title(f"{glycan}", fontsize=10)
                if j==0:
                    ax[j].set_ylabel("centered mean of\nbackground-normalized signal")
    
    # ax[N-1,M-1].set_xticks(ticks=np.arange(1,4), labels=weeks)
    fig.tight_layout()
    plt.suptitle(f"{isotype} - glycans w/ >=3 responders", fontsize=14, y=1.05)
    txt="K refers to total responders (points > 100), N is total points visualized"
    plt.figtext(0.5, -0.04, txt, wrap=True, horizontalalignment='center', fontsize=9)
    plt.savefig(savedir + f"{isotype}_boxplots{t}_not_truncated.png", dpi=320, format='png', transparent=False, bbox_inches='tight', pad_inches=0.3)
    remaining_glycans = remaining_glycans[12:]

In [67]:
imgs = [f"{isotype}_boxplots1_not_truncated.png" for isotype in ['IgG','IgE','IgM']]

images = [
    Image.open(savedir + f)
    for f in imgs
]

pdf_path = savedir + f"G002_glycan_boxplots_not_truncated.pdf"
    
images[0].save(
    pdf_path, "PDF" ,resolution=100.0, save_all=True, append_images=images[1:]
)