In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

In [7]:
titers = pd.read_csv('../../data/titers_katzelnick2015/dengue_titers.tsv', sep='\t')

# pull only the monovalent and NHP 3month vals
titers = titers.loc[titers['source'].isin(['monovalent', 'agm_3mo'])] 
#  group by virus & serum strains and average
titers = titers.groupby(['virus_strain', 'serum_strain'])['titer'].agg('mean').reset_index() 
# put on a log2 scale
titers['titer'] = titers['titer'].map(np.log2)

# find autologous titers
autologous_titers = {}
for idx, row in titers.iterrows():
    if row['virus_strain'] == row['serum_strain']:
        autologous_titers[row['virus_strain']] = row['titer']

# normalize titer values
normalized_with_autologous = 0
normalized_with_max = 0
discarded = 0
for idx, row in titers.iterrows():
    serum = row['serum_strain']
    if serum in autologous_titers: # try and use autologous titers to normalize
        row['titer'] = autologous_titers[serum] - row['titer']
        normalized_with_autologous += 1
    else: # if we have at least 10 values for that serum, default to the max titer
        normalized_with_max += 1
        serum_measurements = titers.loc[titers['serum_strain'] == serum]
        if len(serum_measurements) >= 10:
            autologous_proxy = max(serum_measurements['titer'].values)
            row['titer'] = autologous_proxy - row['titer']
            
        else: # if no autologous and few measurements, discard measurement
            discarded += 1
            row['titer'] = np.nan
titers = titers.loc[np.isfinite(titers['titer'])]

print 'normalized with autologous: %d'%normalized_with_autologous
print 'normalized with max: %d'%normalized_with_max
print 'discarded: %d'%discarded

normalized with autologous: 509
normalized with max: 40
discarded: 9


In [15]:
titers.to_csv('../../data/titers_katzelnick2015/normalized_titers.csv',index=False)