This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 269
/
make_otu_table.py
73 lines (63 loc) · 2.56 KB
/
make_otu_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python
#make_otu_table: makes sample x OTU table
__author__ = "Rob Knight"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Rob Knight", "Justin Kuczynski"] #remember to add yourself
__license__ = "GPL"
__version__ = "1.3.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Release"
"""Makes sample x OTU table from OTU map and taxonomy.
Assumes that in the OTU map, the ids are in the format lib_seq, e.g.
M3FclSwb_1023. Will not work if this assumption is not met. Splits on last
underscore only so should be relatively robust to underscore in sample id.
"""
from collections import defaultdict
from string import strip
from numpy import array
from cogent.util.misc import flatten, InverseDict
from numpy import zeros
from qiime.format import format_otu_table
def libs_from_seqids(seq_ids, delim='_'):
"""Returns set of libraries."""
all_libs = set([i.rsplit(delim, 1)[0] for i in seq_ids])
return all_libs
def seqids_from_otu_to_seqid(otu_to_seqid):
"""Returns set of all seq ids from libs"""
return set(flatten(otu_to_seqid.values()))
def make_otu_table(otu_to_seqid, otu_to_taxonomy=None, delim='_', legacy=True):
"""Makes OTU table from otu_to_seqid and otu_to_taxonomy maps."""
all_seqs = seqids_from_otu_to_seqid(otu_to_seqid)
try:
all_otus = map(str, sorted(map(int, otu_to_seqid.keys())))
except ValueError:
all_otus = sorted(otu_to_seqid.keys())
all_libs = sorted(libs_from_seqids(all_seqs))
try:
table = zeros((len(all_otus), len(all_libs)), int)
except MemoryError, e:
stderr.write('memory error, check format of input otu file\n')
stderr.write('are there really %s otus and %s samples?\n' %
(len(all_otus), len(all_libs)))
stderr.write('traceback follows:\n')
raise(e)
for o in all_otus:
row_idx = all_otus.index(o)
row = table[row_idx]
seqids = otu_to_seqid[o]
for s in seqids:
lib = s.rsplit(delim, 1)[0]
row[all_libs.index(lib)] += 1
if otu_to_taxonomy:
taxonomy = [otu_to_taxonomy.get(o, 'None') for o in all_otus]
else:
taxonomy=None
return format_otu_table(all_libs, all_otus, table, taxonomy, legacy=legacy)
def remove_otus(otu_to_seqid,otus_to_exclude):
"""Remove otus_to_exclude from otu map """
otus_to_exclude_lookup = [e.split()[0] for e in otus_to_exclude]
for otu_id in otu_to_seqid.keys():
if otu_id in otus_to_exclude_lookup:
del otu_to_seqid[otu_id]
return otu_to_seqid