This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
/
ampliconnoise.py
executable file
·176 lines (148 loc) · 7.15 KB
/
ampliconnoise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python
# File created on 20 Jun 2011
from __future__ import division
__author__ = "Justin Kuczynski"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Justin Kuczynski","Jose A. Navas Molina"]
__license__ = "GPL"
__version__ = "1.8.0"
__maintainer__ = "Justin Kuczynski"
__email__ = "justinak@gmail.com"
from qiime.util import parse_command_line_parameters, get_options_lookup
from qiime.util import make_option
from os import makedirs
from qiime.util import load_qiime_config
from qiime.parse import parse_qiime_parameters
from qiime.workflow.util import (print_commands,
call_commands_serially,
print_to_stdout,
no_status_updates,
validate_and_set_jobs_to_start)
from qiime.workflow.ampliconnoise import run_ampliconnoise
import os
qiime_config = load_qiime_config()
#summarize_taxa_through_plots.py
options_lookup = get_options_lookup()
script_info={}
script_info['brief_description'] = "Run AmpliconNoise"
script_info['script_description']="""
The steps performed by this script are:
1. Split input sff.txt file into one file per sample
2. Run scripts required for PyroNoise
3. Run scripts required for SeqNoise
4. Run scripts requred for Perseus (chimera removal)
5. Merge output files into one file similar to the output of split_libraries.py
This script produces a denoised fasta sequence file such as:
>PC.355_41
CATGCTGCCTC...
...
>PC.636_23
CATGCTGCCTC...
...
Additionally, the intermediate results of the ampliconnoise pipeline are
written to an output directory.
Ampliconnoise must be installed and correctly configured, and parallelized
steps will be called with mpirun, not qiime's start_parallel_jobs_torque.py script.
"""
script_info['script_usage'] = [("","Run ampliconnoise, write output to anoise_out.fna, compatible with output of split_libraries.py","%prog -i Fasting_Example.sff.txt -m Fasting_Map.txt -o anoise_out.fna")]
script_info['output_description']= "a fasta file of sequences, with labels as:'>sample1_0' , '>sample1_1' ..."
script_info['required_options'] = [
options_lookup['mapping_fp'],
make_option('-i','--sff_filepath',type='existing_filepath',help='sff.txt filepath'),
make_option('-o','--output_filepath',type='new_filepath',help='the output file'),
]
script_info['optional_options'] = [
make_option('-n','--np',type='int',default=2,help='number of processes to use for mpi steps. Default: %default'),
make_option('--chimera_alpha',type='float',default=-3.8228,help='alpha value to Class.pl used for chimera removal Default: %default'),
make_option('--chimera_beta',type='float',default=0.6200,help='beta value to Class.pl used for chimera removal Default: %default'),
make_option('--seqnoise_resolution',type='string',default=None,help='-s parameter passed to seqnoise. Default is 25.0 for titanium, 30.0 for flx'),
make_option('-d','--output_dir',type='new_dirpath',default=None,help='directory for ampliconnoise intermediate results. Default is output_filepath_dir'),
make_option('-p','--parameter_fp',type='existing_filepath',
help='path to the parameter file, which specifies changes'+\
' to the default behavior. '+\
'See http://www.qiime.org/documentation/file_formats.html#qiime-parameters.'+\
' [if omitted, default values will be used]'),
make_option('-f','--force',action='store_true', default=False,
dest='force',help='Force overwrite of existing output directory'+\
' (note: existing files in output_dir will not be removed)'+\
' [default: %default]'),\
make_option('-w','--print_only',action='store_true',default=False,
dest='print_only',help='Print the commands but don\'t call them -- '+\
'useful for debugging [default: %default]'),
make_option('--suppress_perseus',action='store_true',default=False,help='omit perseus from ampliconnoise workflow'),
make_option('--platform',type='choice',choices=['titanium', 'flx'], default='flx',help="sequencing technology, options are 'titanium','flx'. [default: %default]"),
make_option('--truncate_len',type='int', default=None, help="Specify a truncation length for ampliconnoise. Note that is this is not specified, the truncate length is chosen by the --platform option (220 for FLX, 400 for Titanium) [default: %default]")
]
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
if opts.output_dir == None:
opts.output_dir = opts.output_filepath + '_dir'
if opts.parameter_fp:
try:
parameter_f = open(opts.parameter_fp, 'U')
except IOError:
raise IOError,\
"Can't open parameters file (%s). Does it exist? Do you have read access?" \
% opts.parameter_fp
params = parse_qiime_parameters(parameter_f)
parameter_f.close()
else:
params = parse_qiime_parameters([])
# empty list returns empty defaultdict for now
try:
makedirs(opts.output_dir)
except OSError:
if opts.force:
pass
else:
# Since the analysis can take quite a while, I put this check
# in to help users avoid overwriting previous output.
option_parser.error("Output directory already exists. Please choose"
" a different directory, or force overwrite with -f.")
if opts.print_only:
command_handler = print_commands
else:
command_handler = call_commands_serially
if opts.verbose:
status_update_callback = print_to_stdout
else:
status_update_callback = no_status_updates
# set env variable
if opts.platform == 'flx':
existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE']
new_pyro_fp = os.path.join(os.path.split(existing_pyro_fp)[0],'LookUp_E123.dat')
os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp
elif opts.platform == 'titanium':
existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE']
new_pyro_fp = os.path.join(os.path.split(existing_pyro_fp)[0],'LookUp_Titanium.dat')
os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp
else:
raise RuntimeError('could not find PYRO_LOOKUP_FILE for platform '+platform)
if opts.truncate_len:
try:
truncate_len_int_check = int(opts.truncate_len)
truncate_len = str(truncate_len_int_check)
except ValueError:
raise ValueError,("If specified, truncate_len must be int type.")
else:
truncate_len = None
run_ampliconnoise(
mapping_fp=opts.mapping_fp,
output_dir=os.path.abspath(opts.output_dir),
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
status_update_callback=status_update_callback,
chimera_alpha=opts.chimera_alpha,
chimera_beta=opts.chimera_beta,
sff_txt_fp=opts.sff_filepath,
numnodes=opts.np,
suppress_perseus=opts.suppress_perseus,
output_filepath=os.path.abspath(opts.output_filepath),
platform=opts.platform,
seqnoise_resolution=opts.seqnoise_resolution,
truncate_len=truncate_len
)
if __name__ == "__main__":
main()