This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
/
shared_phylotypes.py
81 lines (62 loc) · 2.96 KB
/
shared_phylotypes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
# File created on 12 Aug 2010
from __future__ import division
__author__ = "Jens Reeder"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Jens Reeder, Justin Kuczynski","Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.6.0"
__maintainer__ = "Jose Clemente"
__email__ = "jose.clemente@gmail.com"
__status__ = "Release"
"""Computes shared phylotypes between samples"""
from biom.parse import parse_biom_table
from numpy import logical_and, zeros, ones
from qiime.format import format_distance_matrix
def _calc_shared_phylotypes_pairwise(otu_table,i,j):
"""Calculate shared otus between two samples in column i and j.
otu_table: OTU tables as a OTUtable subclass
i: a sample id in the OTU table
j: a sample id in the OTU table
"""
shared_phylos = logical_and(otu_table.sampleData(i), otu_table.sampleData(j))
#shared_phylos = logical_and(otu_table[:,i], otu_table[:,j])
return shared_phylos.sum()
def _calc_shared_phylotypes_multiple(otu_table, idxs):
"""Calculate shared otus between several samples indexed by values in idxes.
otu_table: OTU table as a OTUtable subclass
idxs: list of sample ids in the OTU table
"""
if len(idxs)< 2:
raise ValueError, "calc_shared_phylotypes_multiple needs at least two sampleIDs to comapre"
#shared_phylos = ones(len(otu_table[:,1]))
shared_phylos = ones(len(otu_table.ObservationIds))
#for idx in idxs:
for id_ in idxs:
#shared_phylos = logical_and(shared_phylos, otu_table[:,idx])
shared_phylos = logical_and(shared_phylos, otu_table.sampleData(id_))
return shared_phylos.sum()
def calc_shared_phylotypes(infile, reference_sample=None):
"""Calculates number of shared phylotypes for each pair of sample.
infile: otu table filehandle
reference_sample: if set, will use this sample name to calculate shared OTUs
between reference sample, and pair of samples. Useful,
e.g. when the reference sample is the Donor in a transplant study
"""
otu_table = parse_biom_table(infile)
if reference_sample:
#ref_idx = sample_ids.index(reference_sample)
ref_idx = reference_sample
num_samples = len(otu_table.SampleIds)
result_array = zeros((num_samples, num_samples), dtype=int)
for i,samp1_id in enumerate(otu_table.SampleIds):
for j,samp2_id in enumerate(otu_table.SampleIds[:i+1]):
if reference_sample:
result_array[i,j] = result_array[j,i] = \
_calc_shared_phylotypes_multiple(otu_table,
[samp1_id, samp2_id, ref_idx])
else:
result_array[i,j] = result_array[j,i] = \
_calc_shared_phylotypes_pairwise(otu_table, samp1_id,
samp2_id)
return format_distance_matrix(otu_table.SampleIds, result_array)+"\n"