In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os

In [46]:
from scipy.stats import gmean

In [47]:
pd.options.mode.chained_assignment = None

In [48]:
filepath_isotypes = '/networks/cavd/VDCs/Schief/Schief_856-G002/SkinReactions/data/Glycan_array_Scripps/processed_data/DRAFT_CAVD_G002_Glycan_Microarray_data_processed_2024-10-16.txt'
df_glycan_isotypes = pd.read_csv(filepath_isotypes, sep="\t")

usecols = ['sample_id',
           'isotype', 
           'ptid', 
           'study_week',
           'spot_name', 
           'glycan_m_number', 
           'background_subtraced_mean_signal']

df = df_glycan_isotypes[usecols]
df.sample_id = df.sample_id.astype(str)

def centered_mean(x):
    if len(x) >= 6:
        return np.mean(np.sort(x)[1:-1])
    else:
        return np.mean(x)

df['centered_mean'] = df.groupby(['isotype','sample_id','glycan_m_number'])[['background_subtraced_mean_signal']].transform(centered_mean)
df = df.drop(columns='background_subtraced_mean_signal').drop_duplicates()

  df_glycan_isotypes = pd.read_csv(filepath_isotypes, sep="\t")


In [57]:
df_floor = df.copy()
df_floor.loc[df_floor.centered_mean < 100,'centered_mean'] = 100

df_floor['mean_response'] = df_floor.groupby(['ptid','isotype','study_week'])[['centered_mean']].transform('mean')
df_floor['geometric_mean_response'] = df_floor.groupby(['ptid','isotype','study_week'])[['centered_mean']].transform(lambda x: gmean(x))

summary_response = df_floor[['ptid','isotype','study_week','mean_response','geometric_mean_response']].drop_duplicates().reset_index(drop=True)
summary_response['timept'] = summary_response.study_week.map({
    'Wk 0':0,
    'Wk 8':1,
    'Wk 10':2
})
summary_response = summary_response.sort_values(by='timept')

## hvtn ---- ##

filepath_hvtn = '/trials/vaccine/p302/s001/qdata/LabData/AE_assays_pass-through/Glycan_array/processed_by_sdmc/INTERIM_HVTN302_Glycan_Data_Processed_2024-12-03.txt'
hvtn_df = pd.read_csv(filepath_hvtn, sep="\t")

hvtn_df = hvtn_df[['guspec','isotype','ptid','visitno','spot_name','glycan_m_number','background_subtraced_mean_signal']]
hvtn_df['centered_mean'] = hvtn_df.groupby(['isotype','guspec','glycan_m_number'])[['background_subtraced_mean_signal']].transform(centered_mean)
hvtn_df = hvtn_df.drop(columns='background_subtraced_mean_signal').drop_duplicates()

hvtn_df_floor = hvtn_df.copy()
hvtn_df_floor.loc[hvtn_df_floor.centered_mean < 100,'centered_mean'] = 100


In [1]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10,3))

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in summary_response.ptid.unique():
        select = (summary_response.ptid==ppt) & (summary_response.isotype==iso)
        ax[i].plot(summary_response.loc[select].study_week, summary_response.loc[select].mean_response)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log", base=10)

ax[0].set_ylabel("Mean background-normalized\nsignal, avgd over glycans")

txt="Truncated data at 100, then took average over glycans per isotype/ppt/timept."
plt.figtext(0.5, -0.07, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Average response per ppt, over all glycans", y=1.03)
plt.tight_layout()
plt.show()

## ------------------------------------------------------------- ##
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10,3))

isotypes = ['IgG','IgE','IgM']
for i in range(3):
    iso = isotypes[i]
    for ppt in summary_response.ptid.unique():
        select = (summary_response.ptid==ppt) & (summary_response.isotype==iso)
        ax[i].plot(summary_response.loc[select].study_week, summary_response.loc[select].geometric_mean_response)
    ax[i].set_title(f"{iso}")
    ax[i].set_yscale("log", base=10)

ax[0].set_ylabel("Geometric mean of mean background-\nnormalized signal, avgd over glycans")

txt="Truncated data at 100, then took geometric mean over glycans per isotype/ppt/timept."
plt.figtext(0.5, -0.07, txt, wrap=True, horizontalalignment='center', fontsize=9)
plt.suptitle("Average response per ppt, over all glycans", y=1.03)
plt.tight_layout()
plt.show()

In [44]:
? gmean

[0;31mSignature:[0m
 [0mgmean[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0ma[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mweights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnan_policy[0m[0;34m=[0m[0;34m'propagate'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeepdims[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Compute the weighted geometric mean along the specified axis.

The weighted geometric mean of the array :math:`a_i` associated to weights
:math:`w_i` is:

.. math::

    \exp \left( \frac{ \sum_{i=1}^n w_i \ln a_i }{ \sum_{i=1}^n w_i }
               \right) \, ,

and, with equal weights, it gives:

.. math::

    \sqrt[n]{ \prod_{i=1}^n a_i } \, .

Paramete

In [None]:
# df["gmean"] = df.apply(gmean, axis=1)
