This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 269
/
denoise_wrapper.py
70 lines (55 loc) · 2.1 KB
/
denoise_wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
from __future__ import division
"""Functions supporting the QIIME denoiser"""
__author__ = "Jens Reeder"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Jens Reeder", "Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.9.0"
__maintainer__ = "Justin Kuczynski"
__email__ = "justinak@gmail.com"
from os import system, listdir, remove, rmdir
from os.path import exists, split
from re import search
from itertools import chain
from skbio.parse.sequences import parse_fasta
from bfillings.denoiser import lazy_parse_sff_handle
from burrito.util import ApplicationNotFoundError, ApplicationError
from qiime.util import load_qiime_config
from qiime.denoiser.flowgram_clustering import denoise_seqs
def fast_denoiser(
sff_fps, fasta_fp, tmp_outdir, num_cpus, primer, verbose=True,
titanium=False):
"""wrapper function calling methods from the Denoiser package."""
if num_cpus > 1:
denoise_seqs(sff_fps, fasta_fp, tmp_outdir,
primer=primer, cluster=True, num_cpus=num_cpus,
verbose=verbose, titanium=titanium)
else:
denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer,
verbose=verbose, titanium=titanium)
# read centroids and singletons
centroids = parse_fasta(open(tmp_outdir + "/centroids.fasta"))
singletons = parse_fasta(open(tmp_outdir + "/singletons.fasta"))
seqs = chain(centroids, singletons)
# read mapping
mapping = {}
cluster_mapping = open(tmp_outdir + "/denoiser_mapping.txt")
for i, cluster in enumerate(cluster_mapping):
cluster, members = cluster.split(':')
members = members.split()
clust = [cluster]
clust.extend(members)
mapping[i] = clust
return seqs, mapping
def extract_cluster_size(line):
""" extract the size of a cluster from the header line.
line is expected to be of this format:
>GCC6FHY01EQVIC | cluster size: 5
"""
cluster_size = line.split(":")[-1]
try:
cluster_size = int(cluster_size)
except ValueError:
return 0
return cluster_size