In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

In [2]:
titers = pd.read_csv('../../data/titers_katzelnick2015/dengue_titers.tsv', sep='\t')

# pull only the monovalent and NHP 3month vals
titers = titers.loc[titers['source'].isin(['monovalent', 'agm_3mo'])][['virus_strain', 'serum_strain', 'titer']]

In [3]:
demo = titers.loc[(titers['virus_strain'] == 'DENV1/THAILAND/16007/1964') & (titers['serum_strain'] == 'DENV1/NAURU/WESTERNPACIFICDELTA30/1974')]
print len(demo)
print demo
print demo['titer'].mean(axis=0)

9
                   virus_strain                            serum_strain  titer
74    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974    756
75    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974     54
76    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974    117
77    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974    161
78    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974    399
79    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974     66
80    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974     93
81    DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974    194
1264  DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974     26
207.333333333


In [4]:
#  group by virus & serum strains and average
titers = titers.groupby(['virus_strain', 'serum_strain'])['titer'].agg('mean').reset_index() 

# put on a log2 scale
titers['titer'] = titers['titer'].map(np.log2)

In [5]:
demo = titers.loc[(titers['virus_strain'] == 'DENV1/THAILAND/16007/1964') & (titers['serum_strain'] == 'DENV1/NAURU/WESTERNPACIFICDELTA30/1974')]
print demo

                 virus_strain                            serum_strain  \
70  DENV1/THAILAND/16007/1964  DENV1/NAURU/WESTERNPACIFICDELTA30/1974   

       titer  
70  7.695808  


In [6]:
# find autologous titers
autologous_titers = {}
for idx, row in titers.iterrows():
    if row['virus_strain'] == row['serum_strain']:
        autologous_titers[row['serum_strain']] = row['titer']

In [7]:
print autologous_titers['DENV1/NAURU/WESTERNPACIFICDELTA30/1974']

3.90689059561


In [8]:
# normalize titer values
normalized_with_autologous = 0
normalized_with_max = 0
discarded = 0

normalized_titers = []
for idx, row in titers.iterrows():
    serum = row['serum_strain']
    if serum in autologous_titers: # try and use autologous titers to normalize
        normalized_titer_val = autologous_titers[serum] - row['titer']
        normalized_with_autologous += 1
    else: # if we have at least 10 values for that serum, default to the max titer
        serum_measurements = titers.loc[titers['serum_strain'] == serum]
        if len(serum_measurements) >= 10:
            normalized_with_max += 1
            autologous_proxy = max(serum_measurements['titer'].values)
            normalized_titer_val = autologous_proxy - row['titer']
        else: # if no autologous and few measurements, discard measurement
            discarded += 1
            continue
    normalized_titers.append({'virus': row['virus_strain'], 'serum': row['serum_strain'], 'titer': normalized_titer_val})
    
normalized_titers = pd.DataFrame(normalized_titers)

print 'normalized with autologous: %d'%normalized_with_autologous
print 'normalized with max: %d'%normalized_with_max
print 'discarded: %d'%discarded

normalized with autologous: 509
normalized with max: 31
discarded: 9


In [12]:
normalized_titers.to_csv('../../data/titers_katzelnick2015/normalized_titers.csv', index=None)

In [15]:
titers.to_csv('../../data/titers_katzelnick2015/normalized_titers.csv',index=False)