This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 269
/
compute_taxonomy_ratios.py
81 lines (64 loc) · 2.36 KB
/
compute_taxonomy_ratios.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Methods for computing microbial diversity indices
The basis for this library comes from Gevers et al. 2014
(http://www.ncbi.nlm.nih.gov/pubmed/24629344) in which a microbial dysbiosis
index was created based on observed increases and decreases in organisms with
respect to Crohn's disease.
"""
from __future__ import division
__author__ = "Daniel McDonald"
__copyright__ = "Copyright 2014, The QIIME project"
__credits__ = ["Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.9.0"
__maintainer__ = "Daniel McDonald"
__email__ = "mcdonadt@colorado.edu"
from numpy import log, nan, allclose, inf
def compute_index(table, increased, decreased, key):
"""Compute a per-sample index
Parameters
----------
table : biom.Table
A biom table that has information associated with `key`, such as
taxonomy.
increased : set
A set of items that have been observed to have increased
decreased : set
A set of items that have been observed to have decreased
key : str
The metadata key to use for the computation of the index.
Returns
-------
generator
(sample_id, index_score)
Raises
------
KeyError
If the key isn't present
ValueError
If none of the increased or decreased items exist in the table
Notes
-----
Yields `nan` if the decreased count is 0.
"""
md_test = table.metadata(axis='observation')[0]
if key not in md_test:
raise KeyError("%s is not present, the following are present: %s" %
(key, ','.join(md_test)))
inc_f = lambda v, i, md: set(md[key]) & increased
dec_f = lambda v, i, md: set(md[key]) & decreased
inc_t = table.filter(inc_f, axis='observation', inplace=False)
dec_t = table.filter(dec_f, axis='observation', inplace=False)
if inc_t.is_empty():
raise ValueError("None of the increased items were found")
if dec_t.is_empty():
raise ValueError("None of the decreased items were found")
ids_in_common = set(inc_t.ids()) & set(dec_t.ids())
for id_ in ids_in_common:
inc_count = inc_t.data(id_, dense=False).sum()
dec_count = dec_t.data(id_, dense=False).sum()
if allclose(dec_count, 0):
yield (id_, nan)
elif allclose(inc_count, 0):
yield (id_, -inf)
else:
yield (id_, log(inc_count / dec_count))