This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 269
/
pick_de_novo_otus.py
executable file
·135 lines (115 loc) · 6.19 KB
/
pick_de_novo_otus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python
# File created on 27 Oct 2009.
from __future__ import division
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Greg Caporaso", "Kyle Bittinger"]
__license__ = "GPL"
__version__ = "1.7.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Release"
from qiime.util import make_option
from os import makedirs
from qiime.util import (load_qiime_config,
parse_command_line_parameters,
get_options_lookup)
from qiime.parse import parse_qiime_parameters
from qiime.workflow.upstream import run_pick_de_novo_otus
from qiime.workflow.util import (print_commands,
call_commands_serially,
print_to_stdout,
no_status_updates,
validate_and_set_jobs_to_start)
qiime_config = load_qiime_config()
options_lookup = get_options_lookup()
script_info={}
script_info['brief_description'] = """A workflow for de novo OTU picking, taxonomy assignment, phylogenetic tree construction, and OTU table construction."""
script_info['script_description'] = """This script takes a sequence file and performs all processing steps through building the OTU table."""
script_info['script_usage'] = []
script_info['script_usage'].append(("""Simple example""","""The following command will start an analysis on seqs.fna (-i), which is a post-split_libraries fasta file. The sequence identifiers in this file should be of the form <sample_id>_<unique_seq_id>. The following steps, corresponding to the preliminary data preparation, are applied: Pick de novo OTUs at 97%; pick a representative sequence for each OTU (the OTU centroid sequence); align the representative set with PyNAST; assign taxonomy with RDP classifier; filter the alignment prior to tree building - remove positions which are all gaps, and specified as 0 in the lanemask; build a phylogenetic tree with FastTree; build an OTU table. All output files will be written to the directory specified by -o, and subdirectories as appropriate. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).""","""%prog -i $PWD/seqs.fna -o $PWD/otus/"""))
script_info['script_usage_output_to_remove'] = ['$PWD/otus/']
script_info['output_description'] ="""This script will produce an OTU mapping file (pick_otus.py), a representative set of sequences (FASTA file from pick_rep_set.py), a sequence alignment file (FASTA file from align_seqs.py), taxonomy assignment file (from assign_taxonomy.py), a filtered sequence alignment (from filter_alignment.py), a phylogenetic tree (Newick file from make_phylogeny.py) and a biom-formatted OTU table (from make_otu_table.py)."""
script_info['required_options'] = [
make_option('-i','--input_fp',type='existing_filepath',
help='the input fasta file [REQUIRED]'),
make_option('-o','--output_dir',type='new_dirpath',
help='the output directory [REQUIRED]'),
]
script_info['optional_options'] = [\
make_option('-p','--parameter_fp',type='existing_filepath',
help='path to the parameter file, which specifies changes'+\
' to the default behavior. '+\
'See http://www.qiime.org/documentation/file_formats.html#qiime-parameters .'+\
' [if omitted, default values will be used]'),
make_option('-f','--force',action='store_true',\
dest='force',help='Force overwrite of existing output directory'+\
' (note: existing files in output_dir will not be removed)'+\
' [default: %default]'),\
make_option('-w','--print_only',action='store_true',\
dest='print_only',help='Print the commands but don\'t call them -- '+\
'useful for debugging [default: %default]',default=False),\
make_option('-a','--parallel',action='store_true',\
dest='parallel',default=False,\
help='Run in parallel where available [default: %default]'),
options_lookup['jobs_to_start_workflow']
]
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
verbose = opts.verbose
input_fp = opts.input_fp
output_dir = opts.output_dir
verbose = opts.verbose
print_only = opts.print_only
parallel = opts.parallel
# No longer checking that jobs_to_start > 2, but
# commenting as we may change our minds about this.
#if parallel: raise_error_on_parallel_unavailable()
if opts.parameter_fp:
try:
parameter_f = open(opts.parameter_fp, 'U')
except IOError:
raise IOError,\
"Can't open parameters file (%s). Does it exist? Do you have read access?"\
% opts.parameter_fp
params = parse_qiime_parameters(parameter_f)
parameter_f.close()
else:
params = parse_qiime_parameters([])
# empty list returns empty defaultdict for now
jobs_to_start = opts.jobs_to_start
default_jobs_to_start = qiime_config['jobs_to_start']
validate_and_set_jobs_to_start(params,
jobs_to_start,
default_jobs_to_start,
parallel,
option_parser)
try:
makedirs(output_dir)
except OSError:
if opts.force:
pass
else:
# Since the analysis can take quite a while, I put this check
# in to help users avoid overwriting previous output.
option_parser.error("Output directory already exists. Please choose"
" a different directory, or force overwrite with -f.")
if print_only:
command_handler = print_commands
else:
command_handler = call_commands_serially
if verbose:
status_update_callback = print_to_stdout
else:
status_update_callback = no_status_updates
run_pick_de_novo_otus(
input_fp,
output_dir,
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
parallel=parallel,\
status_update_callback=status_update_callback)
if __name__ == "__main__":
main()