In [1]:
import pandas as pd


In [2]:
fname = '../../data/clean/rosen.otu_table.rel_abun.clean'
df = pd.read_csv(fname, sep='\t', index_col=0)

In [23]:
s1smpls = df.index[0:5]
s2smpls = df.index[10:15][::-1]
o = df.columns[0]
print(s1smpls)
print(s2smpls)

Index([u'01-112-7GI', u'01-112-7RI', u'01-112-7TI', u'01-164-7GI',
       u'01-164-7RI'],
      dtype='object')
Index([u'01-209-2G', u'01-208-8T', u'01-200-1TI', u'01-200-1TF',
       u'01-200-1SF'],
      dtype='object')


In [24]:
[i and j for i, j in zip((df.loc[s1smpls, o] > 0).values,(df.loc[s2smpls, o] > 0).values)]

[True, False, False, False, False]

In [25]:
df.loc[s1smpls, o] > 0

01-112-7GI     True
01-112-7RI     True
01-112-7TI    False
01-164-7GI     True
01-164-7RI     True
Name: k__Bacteria;p__Firmicutes;c__Negativicutes;o__Selenomonadales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__;d__denovo58, dtype: bool

In [26]:
df.loc[s2smpls, o] > 0

01-209-2G      True
01-208-8T     False
01-200-1TI     True
01-200-1TF    False
01-200-1SF    False
Name: k__Bacteria;p__Firmicutes;c__Negativicutes;o__Selenomonadales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__;d__denovo58, dtype: bool

In [37]:
def calculate_exchange_prevalence(col, s1smpls, s2smpls):
    """
    Calculate the exchange/sharedness value for abundances in col
    between s1smpls and s2smpls.

    Parameters
    ----------
    col : pandas Series
        Values to correlate and calculate 'exchange' for, e.g. relative
        abundances of one OTU. Index should have at least s1smpls and
        s2smpls. Values should be 0 if the OTU is not present and greater
        than zero if it is.

    s1smpls, s2smpls :  lists
        Samples to consider. Should be the same length and paired (i.e. the
        first sample in s1smpls comes from the same patient as the first
        sample in s2smpls.)

    Returns
    -------
    sharedness : float
        Percent of (s1, s2) pairs where both samples are non-zero.
        In other words, percentage of x, y points which are off the axes.
    """
    preva = sum([i and j for i, j in zip(
                    (col.loc[s1smpls] > 0).values,
                    (col.loc[s2smpls] > 0).values)])\
            /float(len(s1smpls))
    return preva

In [38]:
df.iloc[:, :5].apply(calculate_exchange_prevalence, args=(s1smpls, s2smpls)).T

k__Bacteria;p__Firmicutes;c__Negativicutes;o__Selenomonadales;f__Acidaminococcaceae;g__Phascolarctobacterium;s__;d__denovo58      0.2
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiales_Incertae_Sedis_XIII;g__Anaerovorax;s__;d__denovo1106    0.0
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Ruminococcus2;s__;d__denovo57                      0.0
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__;d__denovo886            0.2
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__;s__;d__denovo887                                  0.0
dtype: float64

In [44]:
tmp2 = df.loc[s1smpls].iloc[:, :5]

In [45]:
tmp1 = df.loc[s2smpls].iloc[:, :5]

In [47]:
pd.concat((tmp1, tmp2)).shape

(10, 5)

In [48]:
tmp1.index[0]


'01-209-2G'

# Scratch work for labeling exchanged OTUs

In [25]:
import pandas as pd
import numpy as np
from statsmodels.sandbox.stats.multicomp import multipletests


fname = '../../data/analysis/exchange.with_partial_corrs.txt'
df = pd.read_csv(fname, sep='\t')
df['site_comparison'] = df['site1'] + '-' + df['site2']

n_thresh = 10
r_partial = 0
q_partial = 0.1

In [8]:
df = df.query('n_partial > @n_thresh')

_, df['q_partial'], _, _ = multipletests(df['p_partial'], method='fdr_bh')
df.head()

Unnamed: 0,otu,site1,site2,site3,r_site12,p_site12,n_site12,r_partial,p_partial,n_partial,site_comparison,q_partial
4,k__Bacteria;p__;c__;o__;f__;g__;s__;d__denovo364,bal,gastric_fluid,throat_swab,0.633498,0.000225,29,0.660447,0.001,21,bal-gastric_fluid,0.003688
17,k__Bacteria;p__Actinobacteria;c__Actinobacteri...,gastric_fluid,throat_swab,bal,0.226411,0.177817,37,0.21048,0.174,24,gastric_fluid-throat_swab,0.248675
18,k__Bacteria;p__Actinobacteria;c__Actinobacteri...,bal,gastric_fluid,throat_swab,0.56218,0.001224,30,0.335027,0.0875,20,bal-gastric_fluid,0.146402
19,k__Bacteria;p__Actinobacteria;c__Actinobacteri...,bal,throat_swab,gastric_fluid,0.1867,0.332174,29,0.06833,0.3755,25,bal-throat_swab,0.456725
20,k__Bacteria;p__Actinobacteria;c__Actinobacteri...,gastric_fluid,throat_swab,bal,0.462177,1e-06,99,0.471868,0.0,67,gastric_fluid-throat_swab,0.0


In [10]:
exchange = df.query('n_partial >= @n_thresh').query('r_partial > 0').query('q_partial < 0.1')

In [27]:
# Get exchanged bugs
exchange.pivot(columns='site_comparison', index='otu', values='r_partial').head()

site_comparison,bal-gastric_fluid,bal-throat_swab,gastric_fluid-throat_swab
otu,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
k__Bacteria;p__;c__;o__;f__;g__;s__;d__denovo364,0.660447,,
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__;d__denovo60,,,0.471868
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__;d__denovo77,0.528483,,0.366272
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__;d__denovo82,,,0.441879
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Micrococcaceae;g__Rothia;s__;d__denovo160,,,0.827167


In [26]:
# Get exchanged bugs and convert to 1 for exchanged, NaN for not.
exchange.pivot(columns='site_comparison', index='otu', values='r_partial').head().notnull().astype(int).replace(0, np.nan)

site_comparison,bal-gastric_fluid,bal-throat_swab,gastric_fluid-throat_swab
otu,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
k__Bacteria;p__;c__;o__;f__;g__;s__;d__denovo364,1.0,,
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__;d__denovo60,,,1.0
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__;d__denovo77,1.0,,1.0
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__;d__denovo82,,,1.0
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Micrococcaceae;g__Rothia;s__;d__denovo160,,,1.0
