Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

184 lines (164 sloc) 8.64 kB
# Copyright 2009 by Osvaldo Zagordi. All rights reserved.
# Revisions copyright 2010 by Peter Cock.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Command line wrapper for the short read aligner Novoalign by Novocraft."""
import types
from Bio.Application import _Option, AbstractCommandline
class NovoalignCommandline(AbstractCommandline):
"""Command line wrapper for novoalign by Novocraft.
See www.novocraft.com - novoalign is a short read alignment program.
Example:
>>> from Bio.Sequencing.Applications import NovoalignCommandline
>>> novoalign_cline = NovoalignCommandline(database='some_db',
... readfile='some_seq.txt')
>>> print novoalign_cline
novoalign -d some_db -f some_seq.txt
As will all the Biopython application wrappers, you can also add or
change options after creating the object:
>>> novoalign_cline.format = 'PRBnSEQ'
>>> novoalign_cline.r_method='0.99' # limited valid values
>>> novoalign_cline.fragment = '250 20' # must be given as a string
>>> novoalign_cline.miRNA = 100
>>> print novoalign_cline
novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100
You would typically run the command line with novoalign_cline() or via
the Python subprocess module, as described in the Biopython tutorial.
Last checked against version: 2.05.04
"""
def __init__(self, cmd="novoalign", **kwargs):
READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ']
REPORT_FORMAT = ['Native', 'Pairwise', 'SAM']
REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99']
self.parameters = \
[
_Option(["-d", "database"],
"database filename",
filename=True,
equate=False),
_Option(["-f", "readfile"],
"read file",
filename=True,
equate=False),
_Option(["-F", "format"],
"Format of read files.\n\nAllowed values: %s" \
% ", ".join(READ_FORMAT),
checker_function=lambda x: x in READ_FORMAT,
equate=False),
# Alignment scoring options
_Option(["-t", "threshold"],
"Threshold for alignment score",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-g", "gap_open"],
"Gap opening penalty [default: 40]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-x", "gap_extend"],
"Gap extend penalty [default: 15]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-u", "unconverted"],
"Experimental: unconverted cytosines penalty in bisulfite mode\n\n"
"Default: no penalty",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
# Quality control and read filtering
_Option(["-l", "good_bases"],
"Minimum number of good quality bases [default: log(N_g, 4) + 5]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-h", "homopolymer"],
"Homopolymer read filter [default: 20; disable: negative value]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
# Read preprocessing options
_Option(["-a", "adapter3"],
"Strips a 3' adapter sequence prior to alignment.\n\n"
"With paired ends two adapters can be specified",
checker_function=lambda x: isinstance(x, types.StringType),
equate=False),
_Option(["-n", "truncate"],
"Truncate to specific length before alignment",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-s", "trimming"],
"If fail to align, trim by s bases until they map or become shorter than l.\n\n"
"Ddefault: 2",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-5", "adapter5"],
"Strips a 5' adapter sequence.\n\n"
"Similar to -a (adaptor3), but on the 5' end.",
checker_function=lambda x: isinstance(x, types.StringType),
equate=False),
# Reporting options
_Option(["-o", "report"],
"Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \
% ", ".join(REPORT_FORMAT),
checker_function=lambda x: x in REPORT_FORMAT,
equate=False),
_Option(["-Q", "quality"],
"Lower threshold for an alignment to be reported [default: 0]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-R", "repeats"],
"If score difference is higher, report repeats.\n\n"
"Otherwise -r read method applies [default: 5]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-r", "r_method"],
"Methods to report reads with multiple matches.\n\n"
"Allowed values: %s\n"
"'All' and 'Exhaustive' accept limits." \
% ", ".join(REPEAT_METHOD),
checker_function=lambda x: x.split()[0] in REPEAT_METHOD,
equate=False),
_Option(["-e", "recorded"],
"Alignments recorded with score equal to the best.\n\n"
"Default: 1000 in default read method, otherwise no limit.",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
_Option(["-q", "qual_digits"],
"Decimal digits for quality scores [default: 0]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
# Paired end options
_Option(["-i", "fragment"],
"Fragment length (2 reads + insert) and standard deviation [default: 250 30]",
checker_function=lambda x: len(x.split()) == 2,
equate=False),
_Option(["-v", "variation"],
"Structural variation penalty [default: 70]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
# miRNA mode
_Option(["-m", "miRNA"],
"Sets miRNA mode and optionally sets a value for the region scanned [default: off]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
# Multithreading
_Option(["-c", "cores"],
"Number of threads, disabled on free versions [default: number of cores]",
checker_function=lambda x: isinstance(x, types.IntType),
equate=False),
# Quality calibrations
_Option(["-k", "read_cal"],
"Read quality calibration from file (mismatch counts)",
checker_function=lambda x: isinstance(x, types.StringType),
equate=False),
_Option(["-K", "write_cal"],
"Accumulate mismatch counts and write to file",
checker_function=lambda x: isinstance(x, types.StringType),
equate=False),
]
AbstractCommandline.__init__(self, cmd, **kwargs)
def _test():
"""Run the module's doctests (PRIVATE)."""
print "Running Novoalign doctests..."
import doctest
doctest.testmod()
print "Done"
if __name__ == "__main__":
_test()
Jump to Line
Something went wrong with that request. Please try again.