This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 269
/
parallel_pick_otus_usearch61_ref.py
executable file
·138 lines (108 loc) · 6.19 KB
/
parallel_pick_otus_usearch61_ref.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python
# File created on 07 Jul 2012
from __future__ import division
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.7.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Release"
from qiime.util import (parse_command_line_parameters,
get_options_lookup,
make_option)
from qiime.parallel.pick_otus import ParallelPickOtusUsearch61Ref
############################
# Script functionality
options_lookup = get_options_lookup()
script_info={}
script_info['brief_description']="""Parallel pick otus using usearch_ref"""
script_info['script_description']="""This script works like the pick_otus.py script, but is intended to make use of multicore/multiprocessor environments to perform analyses in parallel."""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Example""","""Pick OTUs by searching $PWD/inseqs.fasta against $PWD/refseqs.fasta with reference-based usearch and write the output to the $PWD/usearch_ref_otus/ directory. This is a closed-reference OTU picking process. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).""","""%prog -i $PWD/seqs.fna -r $PWD/refseqs.fna -o $PWD/usearch_ref_otus/"""))
script_info['output_description']=""""""
script_info['required_options'] = [\
make_option('-i','--input_fasta_fp',action='store',\
type='existing_filepath',help='full path to '+\
'input_fasta_fp'),
make_option('-o','--output_dir',action='store',\
type='new_dirpath',help='path to store output files'),
make_option('-r','--refseqs_fp',action='store',\
type='existing_filepath',help='full path to '+\
'reference collection')
]
script_info['optional_options'] = [\
make_option('-s','--similarity',action='store',\
type='float',help='Sequence similarity '+\
'threshold [default: %default]',default=0.97),
make_option('-z', '--enable_rev_strand_match', action='store_true',
default=False,
help=('Enable reverse strand matching for uclust, uclust_ref, '
'usearch, usearch_ref, usearch61, or usearch61_ref otu picking, '
'will double the amount of memory used. [default: %default]')),
make_option('--max_accepts', default='default',
help="max_accepts value to uclust, uclust_ref, usearch61, and "
"usearch61_ref. By default, will use value suggested by "
"method (uclust: 20, usearch61: 1) [default: %default]"),
make_option('--max_rejects', default='default',
help="max_rejects value for uclust, uclust_ref, usearch61, and "
"usearch61_ref. With default settings, will use value "
"recommended by clustering method used "
"(uclust: 500, usearch61: 8 for usearch_fast_cluster option,"
" 32 for reference and smallmem options) "
"[default: %default]"),
make_option('--word_length',default='default',
help="word length value for uclust, uclust_ref, and "
"usearch, usearch_ref, usearch61, and usearch61_ref. "
"With default setting, will use the setting recommended by "
"the method (uclust: 12, usearch: 64, usearch61: 8). int "
"value can be supplied to override this setting. "
"[default: %default]"),
make_option('--minlen', default=64, help=("Minimum length of sequence "
"allowed for usearch, usearch_ref, usearch61, and "
"usearch61_ref. [default: %default]"), type='int'),
make_option('--usearch_fast_cluster', default=False, help=("Use fast "
"clustering option for usearch or usearch61_ref with new "
"clusters. --enable_rev_strand_match can not be enabled "
"with this option, and the only valid option for "
"usearch61_sort_method is 'length'. This option uses more "
"memory than the default option for de novo clustering."
" [default: %default]"), action='store_true'),
make_option('--usearch61_sort_method', default='abundance', help=(
"Sorting method for usearch61 and usearch61_ref. Valid "
"options are abundance, length, or None. If the "
"--usearch_fast_cluster option is enabled, the only sorting "
"method allowed in length. [default: %default]"), type='str'),
make_option('--sizeorder', default=False, help=(
"Enable size based preference in clustering with usearch61. "
"Requires that --usearch61_sort_method be abundance. "
"[default: %default]"), action='store_true'),
options_lookup['jobs_to_start'],
options_lookup['retain_temp_files'],
options_lookup['suppress_submit_jobs'],
options_lookup['poll_directly'],
options_lookup['cluster_jobs_fp'],
options_lookup['suppress_polling'],
options_lookup['job_prefix'],
options_lookup['seconds_to_sleep']
]
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
# create dict of command-line options
params = eval(str(opts))
parallel_runner = ParallelPickOtusUsearch61Ref(
cluster_jobs_fp=opts.cluster_jobs_fp,
jobs_to_start=opts.jobs_to_start,
retain_temp_files=opts.retain_temp_files,
suppress_polling=opts.suppress_polling,
seconds_to_sleep=opts.seconds_to_sleep)
parallel_runner(opts.input_fasta_fp,
opts.output_dir,
params,
job_prefix=opts.job_prefix,
poll_directly=opts.poll_directly,
suppress_submit_jobs=False)
if __name__ == "__main__":
main()