Skip to content

Commit

Permalink
multithread option added to mergepairs
Browse files Browse the repository at this point in the history
  • Loading branch information
davidealbanese committed Apr 20, 2018
1 parent c471e79 commit 8a24699
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 23 deletions.
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ processing of amplicon sequencing data, **from raw sequences** to
**OTU tables**, **taxonomy classification** and **phylogenetic tree**
inference. The pipeline can be applied to a range of highly conserved
genes/spacers, such as **16S rRNA gene**, **Internal Transcribed
Spacer (ITS)** and **28S rRNA**. micca is an open-source, GPLv3-licensed
Spacer (ITS)** **18S** and **28S rRNA**. micca is an open-source, GPLv3-licensed
software.

* `Homepage <http://micca.org/>`_
Expand All @@ -23,6 +23,7 @@ Key features:
Torrent) and **overlapping paired-end** reads (Illumina MiSeq/HiSeq);
* **multithread** de novo greedy, closed-reference, open-reference and swarm OTU
picking protocols;
* multithread denoising of Illumina reads;
* **state-of-the-art taxonomic classification** algorithms (RDP and
consensus-based classifier);
* fast and and memory efficient **NAST** multiple sequence alignment (MSA);
Expand Down
8 changes: 5 additions & 3 deletions micca/api/_mergepairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

def mergepairs(input_fns, output_fn, reverse_fn=None, notmerged_fwd_fn=None,
notmerged_rev_fn=None, minovlen=32, maxdiffs=8, pattern="_R1",
repl="_R2", sep="_", nostagger=False):
repl="_R2", sep="_", nostagger=False, threads=1):

if not isinstance(input_fns, list):
raise ValueError("input_fns must be of type list")
Expand Down Expand Up @@ -60,7 +60,8 @@ def mergepairs(input_fns, output_fn, reverse_fn=None, notmerged_fwd_fn=None,
fastq_minovlen=minovlen,
fastq_maxdiffs=maxdiffs,
fastq_allowmergestagger=not nostagger,
fastq_nostagger=nostagger)
fastq_nostagger=nostagger,
threads=threads)
else:
# output directory for temp files
output_dir = os.path.dirname(output_fn)
Expand Down Expand Up @@ -112,7 +113,8 @@ def mergepairs(input_fns, output_fn, reverse_fn=None, notmerged_fwd_fn=None,
fastq_minovlen=minovlen,
fastq_maxdiffs=maxdiffs,
fastq_allowmergestagger=not nostagger,
fastq_nostagger=nostagger)
fastq_nostagger=nostagger,
threads=threads)
except micca.tp.vsearch.VSEARCHError as err:
warnings.warn("{}: VSEARCH error: {}, SKIP\n"
.format(input_fn_base, err))
Expand Down
6 changes: 5 additions & 1 deletion micca/cmds/mergepairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ def main(argv):
help="write not merged forward reads.")
group.add_argument('--notmerged-rev', metavar="FILE",
help="write not merged reverse reads.")
group.add_argument('-t', '--threads', default=1, type=int,
help="number of threads to use (1 to 256, default "
"%(default)s).")

args = parser.parse_args(argv)

Expand All @@ -147,7 +150,8 @@ def main(argv):
pattern=args.pattern,
repl=args.repl,
sep=args.sep,
nostagger=args.nostagger)
nostagger=args.nostagger,
threads=args.threads)
except Exception as err:
sys.stderr.write("Error: {}\n".format(err))
sys.exit(1)
24 changes: 12 additions & 12 deletions micca/cmds/otu.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,20 +150,20 @@ def main(argv):
help="number of threads to use (1 to 256, default "
"%(default)s).")
group.add_argument('-g', '--greedy', default="dgc", choices=["dgc", "agc"],
help="greedy clustering strategy, distance (DGC) or "
"abundance-based (AGC) (for 'denovo_greedy' and "
"'open_ref' clustering methods) (default %(default)s).")
help="greedy clustering strategy, distance (DGC) or "
"abundance-based (AGC) (for 'denovo_greedy' and "
"'open_ref' clustering methods) (default %(default)s).")
group.add_argument('-s', '--minsize', type=int,
help="discard sequences with an abundance value "
"smaller than MINSIZE after dereplication (>=1, "
"default values are 2 for 'denovo_greedy' and "
"'open_ref', 1 for 'denovo_swarm' and 8 for "
"'denovo_unoise').")
help="discard sequences with an abundance value "
"smaller than MINSIZE after dereplication (>=1, "
"default values are 2 for 'denovo_greedy' and "
"'open_ref', 1 for 'denovo_swarm' and 8 for "
"'denovo_unoise').")
group.add_argument('-a', '--strand', default="both",
choices=["both", "plus"],
help="search both strands or the plus strand only "
"(for 'closed_ref' and 'open_ref' clustering methods, "
"default %(default)s).")
choices=["both", "plus"],
help="search both strands or the plus strand only "
"(for 'closed_ref' and 'open_ref' clustering methods, "
"default %(default)s).")

# chimeras
group_chim = parser.add_argument_group("Chimera removal specific options")
Expand Down
5 changes: 3 additions & 2 deletions micca/tp/vsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,12 @@ def fastq_mergepairs(forward_fn, reverse_fn, fastaout_fn=None, fastqout_fn=None,
fastaout_notmerged_fwd_fn=None, fastaout_notmerged_rev_fn=None,
fastqout_notmerged_fwd_fn=None, fastqout_notmerged_rev_fn=None,
fastq_minovlen=10, fastq_maxdiffs=5,
fastq_allowmergestagger=False, fastq_nostagger=True):
fastq_allowmergestagger=False, fastq_nostagger=True,
threads=1):

params = ["--fastq_mergepairs", forward_fn, "--reverse", reverse_fn,
"--fastq_maxdiffs", str(fastq_maxdiffs), "--fastq_minovlen",
str(fastq_minovlen)]
str(fastq_minovlen), "--threads", str(threads)]

if fastaout_fn is not None:
params.extend(["--fastaout", fastaout_fn])
Expand Down
6 changes: 3 additions & 3 deletions scripts/micca
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def main():
stats Report sequences stats
filterstats Report sequences stats relative to quality filtering
filter Filter sequences according to the expected error rate %%
otu Assign similar sequences to OTUs (OTUs)
otu Assign similar sequences to OTUs or SVs
classify Assign taxonomy
msa Multiple sequence alignment (MSA)
tree Infer phylogenetic trees from alignments
tree Infer phylogenetic trees from multiple alignments
root Reroot phylogenetic trees
tobiom Convert micca output files into the BIOM format
tobiom Convert micca output files into the BIOM (v1.0) format
tablestats Report OTU table summaries
tablerare Rarefy OTU tables
tabletotax Build tables for each taxonomic level
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
'Intended Audience :: Science/Research',
'License :: OSI Approved :: GNU General Public License (GPL)',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 3',
'Programming Language :: C',
'Programming Language :: C++',
'Topic :: Scientific/Engineering :: Bio-Informatics',
Expand Down

0 comments on commit 8a24699

Please sign in to comment.