This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 269
/
shared_phylotypes.py
81 lines (59 loc) · 2.68 KB
/
shared_phylotypes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
# File created on 12 Aug 2010
from __future__ import division
__author__ = "Jens Reeder"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Jens Reeder, Justin Kuczynski", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.9.0"
__maintainer__ = "Jose Clemente"
__email__ = "jose.clemente@gmail.com"
"""Computes shared phylotypes between samples"""
from numpy import logical_and, zeros, ones
from qiime.format import format_distance_matrix
def _calc_shared_phylotypes_pairwise(otu_table, i, j):
"""Calculate shared otus between two samples in column i and j.
otu_table: OTU tables as a OTUtable subclass
i: a sample id in the OTU table
j: a sample id in the OTU table
"""
shared_phylos = logical_and(
otu_table.data(i, 'sample'),
otu_table.data(j, 'sample'))
return shared_phylos.sum()
def _calc_shared_phylotypes_multiple(otu_table, idxs):
"""Calculate shared otus between several samples indexed by values in idxes.
otu_table: OTU table as a OTUtable subclass
idxs: list of sample ids in the OTU table
"""
if len(idxs) < 2:
raise ValueError("calc_shared_phylotypes_multiple needs at least two "
"sampleIDs to comapre")
shared_phylos = ones(len(otu_table.ids(axis='observation')))
for id_ in idxs:
shared_phylos = logical_and(shared_phylos, otu_table.data(id_, 'sample'))
return shared_phylos.sum()
def calc_shared_phylotypes(otu_table, reference_sample=None):
"""Calculates number of shared phylotypes for each pair of sample.
infile: otu table filehandle
reference_sample: if set, will use this sample name to calculate shared
OTUs between reference sample, and pair of samples. Useful, e.g. when
the reference sample is the Donor in a transplant study
"""
if reference_sample:
ref_idx = reference_sample
sample_ids = otu_table.ids()
num_samples = len(sample_ids)
result_array = zeros((num_samples, num_samples), dtype=int)
for i, samp1_id in enumerate(sample_ids):
for j, samp2_id in enumerate(sample_ids[:i + 1]):
if reference_sample:
result_array[i, j] = result_array[j, i] = \
_calc_shared_phylotypes_multiple(otu_table,
[samp1_id, samp2_id,
ref_idx])
else:
result_array[i, j] = result_array[j, i] = \
_calc_shared_phylotypes_pairwise(otu_table, samp1_id,
samp2_id)
return format_distance_matrix(sample_ids, result_array) + "\n"