From 6957dd51be97e3cc258a36ca0904d5cbfd0de328 Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 3 Dec 2010 19:02:15 +0000 Subject: [PATCH] Make name and description mandatory in app wrappers Note this changes the private API to the Bio.Application wrapper parameter classes. --- Bio/Align/Applications/_Clustalw.py | 330 +++----- Bio/Align/Applications/_Mafft.py | 148 ++-- Bio/Align/Applications/_Muscle.py | 272 +++---- Bio/Align/Applications/_Prank.py | 120 ++- Bio/Align/Applications/_Probcons.py | 57 +- Bio/Align/Applications/_TCoffee.py | 107 ++- Bio/Application/__init__.py | 18 +- Bio/Blast/Applications.py | 939 +++++++++++++--------- Bio/Emboss/Applications.py | 893 +++++++++++--------- Bio/PopGen/GenePop/Controller.py | 39 +- Bio/Sequencing/Applications/_Novoalign.py | 198 ++--- 11 files changed, 1616 insertions(+), 1505 deletions(-) diff --git a/Bio/Align/Applications/_Clustalw.py b/Bio/Align/Applications/_Clustalw.py index 0a32faf67b6..35687ad4fd5 100644 --- a/Bio/Align/Applications/_Clustalw.py +++ b/Bio/Align/Applications/_Clustalw.py @@ -38,23 +38,14 @@ def __init__(self, cmd="clustalw", **kwargs): self.parameters = \ [ _Option(["-infile", "-INFILE", "INFILE", "infile"], - ["file"], - None, - False, "Input sequences.", - True), + types=["file"]), _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"], - ["file"], - None, - False, "Profiles (old alignment).", - True), + types=["file"]), _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"], - ["file"], - None, - False, "Profiles (old alignment).", - True), + types=["file"]), ################## VERBS (do things) ############################# _Switch(["-options", "-OPTIONS", "OPTIONS", "options"], "List the command line parameters"), @@ -69,11 +60,8 @@ def __init__(self, cmd="clustalw", **kwargs): _Switch(["-tree", "-TREE", "TREE", "tree"], "Calculate NJ tree."), _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], - [], - lambda x: isinstance(x, int), - False, "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", - True), + checker_function=lambda x: isinstance(x, int)), _Switch(["-convert", "-CONVERT", "CONVERT", "convert"], "Output the input sequences in a different file format."), ##################### PARAMETERS (set things) ######################### @@ -88,184 +76,126 @@ def __init__(self, cmd="clustalw", **kwargs): _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], "Use FAST algorithm for the alignment guide tree"), _Option(["-type", "-TYPE", "TYPE", "type"], - [], - lambda x: x in ["PROTEIN", "DNA", "protein", "dna"], - False, "PROTEIN or DNA sequences", - True), + checker_function=lambda x: x in ["PROTEIN", "DNA", + "protein", "dna"]), _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"], "Protein alignment with negative values in matrix"), _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"], - ["file"], - None, - False, "Output sequence alignment file name", - True), + types=["file"]), _Option(["-output", "-OUTPUT", "OUTPUT", "output"], - [], - lambda x: x in ["GCG", "GDE", "PHYLIP", "PIR", "NEXUS", - "gcg", "gde", "phylip", "pir", "nexus"], - False, "Output format: GCG, GDE, PHYLIP, PIR or NEXUS", - True), + checker_function=lambda x: x in ["GCG", "GDE", "PHYLIP", + "PIR", "NEXUS", + "gcg", "gde", "phylip", + "pir", "nexus"]), _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"], - [], - lambda x: x in ["INPUT", "input", "ALIGNED", "aligned"], - False, "Output taxon order: INPUT or ALIGNED", - True), + checker_function=lambda x: x in ["INPUT", "input", + "ALIGNED", "aligned"]), _Option(["-case", "-CASE", "CASE", "case"], - [], - lambda x: x in ["UPPER", "upper", "LOWER", "lower"], - False, "LOWER or UPPER (for GDE output only)", - True), + checker_function=lambda x: x in ["UPPER", "upper", + "LOWER", "lower"]), _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], - [], - lambda x: x in ["ON", "on", "OFF", "off"], - False, "OFF or ON (for Clustal output only)", - True), + checker_function=lambda x: x in ["ON", "on", + "OFF", "off"]), _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], - [], - lambda x: x in ["ON", "on", "OFF", "off"], - False, "OFF or ON (NEW- for all output formats)", - True), + checker_function=lambda x: x in ["ON", "on", + "OFF", "off"]), _Option(["-range", "-RANGE", "RANGE", "range"], - [], - None, - False, "Sequence range to write starting m to m+n. " - "Input as string eg. '24,200'", - True), + "Input as string eg. '24,200'"), _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], - [], - lambda x: isinstance(x, int), - False, "Maximum allowed input sequence length", - True), + checker_function=lambda x: isinstance(x, int)), _Switch(["-quiet", "-QUIET", "QUIET", "quiet"], "Reduce console output to minimum"), _Switch(["-stats", "-STATS", "STATS", "stats"], "Log some alignents statistics to file"), # ***Fast Pairwise Alignments:*** _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Word size", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Number of best diags.", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-window", "-WINDOW", "WINDOW", "window"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Window around best diags.", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap penalty", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-score", "-SCORE", "SCORE", "score"], - [], - lambda x: x in ["percent", "PERCENT", "absolute", - "ABSOLUTE"], - False, "Either: PERCENT or ABSOLUTE", - True), + checker_function=lambda x: x in ["percent", "PERCENT", + "absolute","ABSOLUTE"]), # ***Slow Pairwise Alignments:*** _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], - [], - lambda x: x in ["BLOSUM", "PAM", "GONNET", "ID", \ - "blosum", "pam", "gonnet", "id"] or \ - os.path.exists(x), - False, "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", - True), + checker_function=lambda x: x in ["BLOSUM", "PAM", + "GONNET", "ID", + "blosum", "pam", + "gonnet", "id"] or \ + os.path.exists(x), + types=["file"]), _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], - [], - lambda x: x in ["IUB", "CLUSTALW", "iub", "clustalw"] or \ - os.path.exists(x), - False, "DNA weight matrix=IUB, CLUSTALW or filename", - True), + checker_function=lambda x: x in ["IUB", "CLUSTALW", + "iub", "clustalw"] or \ + os.path.exists(x), + types=["file"]), _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap opening penalty", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap opening penalty", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), # ***Multiple Alignments:*** _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], - ["file"], - None, - False, "Output file name for newly created guide tree", - True), + types=["file"]), _Option(["-usetree", "-USETREE", "USETREE", "usetree"], - ["file"], - lambda x: os.path.exists, - False, "File name of guide tree", - True), + checker_function=lambda x: os.path.exists, + types=["file"]), _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"], - [], - lambda x: x in ["BLOSUM", "PAM", "GONNET", "ID", \ - "blosum", "pam", "gonnet", "id"] or \ - os.path.exists(x), - False, "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", - True), + checker_function=lambda x: x in ["BLOSUM", "PAM", + "GONNET", "ID", + "blosum", "pam", + "gonnet", "id"] or \ + os.path.exists(x), + types=["file"]), _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], - [], - lambda x: x in ["IUB", "CLUSTALW", "iub", "clustalw"] or \ - os.path.exists(x), - False, "DNA weight matrix=IUB, CLUSTALW or filename", - True), + checker_function=lambda x: x in ["IUB", "CLUSTALW", + "iub", "clustalw"] or \ + os.path.exists(x), + types=["file"]), _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap opening penalty", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap extension penalty", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], "No end gap separation pen."), _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap separation pen. range", - False), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], "Residue-specific gaps off"), _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], @@ -273,61 +203,41 @@ def __init__(self, cmd="clustalw", **kwargs): _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], "List hydrophilic res."), _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "% ident. for delay", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Transitions weighting", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"], - [], - lambda x: x in ["NONE", "TREE", "ALIGNMENT", - "none", "tree", "alignment"], - False, "NONE or TREE or ALIGNMENT", - True), + checker_function=lambda x: x in ["NONE", "TREE", + "ALIGNMENT", + "none", "tree", + "alignment"]), _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"], - [], - lambda x: isinstance(x, int), - False, "maximum number of iterations to perform", - False), + checker_function=lambda x: isinstance(x, int)), _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], "Disable sequence weighting"), # ***Profile Alignments:*** _Switch(["-profile", "-PROFILE", "PROFILE", "profile"], "Merge two alignments by profile alignment"), _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], - ["file"], - None, - False, "Output file name for new guide tree of profile1", - True), + types=["file"]), _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], - ["file"], - None, - False, "Output file for new guide tree of profile2", - True), + types=["file"]), _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"], - ["file"], - lambda x: os.path.exists, - False, "File name of guide tree for profile1", - True), + checker_function=lambda x: os.path.exists, + types=["file"]), _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"], - ["file"], - lambda x: os.path.exists, - False, "File name of guide tree for profile2", - True), + checker_function=lambda x: os.path.exists, + types=["file"]), # ***Sequence to Profile Alignments:*** _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], "Sequentially add profile2 sequences to profile1 alignment"), @@ -337,94 +247,60 @@ def __init__(self, cmd="clustalw", **kwargs): "Do not use secondary structure-gap penalty mask for profile 2"), # ***Structure Alignments:*** _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], - [], - lambda x: x in ["STRUCTURE", "MASK", "BOTH", "NONE", - "structure", "mask", "both", "none"], - False, "STRUCTURE or MASK or BOTH or NONE output in alignment file", - True), + checker_function=lambda x: x in ["STRUCTURE", "MASK", + "BOTH", "NONE", + "structure", "mask", + "both", "none"]), _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap penalty for helix core residues", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "gap penalty for strand core residues", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap penalty for loop regions", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], - [], - lambda x: isinstance(x, int) or \ - isinstance(x, float), - False, "Gap penalty for structure termini", - True), + checker_function=lambda x: isinstance(x, int) or \ + isinstance(x, float)), _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], - [], - lambda x: isinstance(x, int), - False, "Number of residues inside helix to be treated as terminal", - True), + checker_function=lambda x: isinstance(x, int)), _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], - [], - lambda x: isinstance(x, int), - False, "Number of residues outside helix to be treated as terminal", - True), + checker_function=lambda x: isinstance(x, int)), _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], - [], - lambda x: isinstance(x, int), - False, "Number of residues inside strand to be treated as terminal", - True), + checker_function=lambda x: isinstance(x, int)), _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], - [], - lambda x: isinstance(x, int), - False, "number of residues outside strand to be treated as terminal", - True), + checker_function=lambda x: isinstance(x, int)), # ***Trees:*** _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], - [], - lambda x: x in ["NJ", "PHYLIP", "DIST", "NEXUS", - "nj", "phylip", "dist", "nexus"], - False, "nj OR phylip OR dist OR nexus", - True), + checker_function=lambda x: x in ["NJ", "PHYLIP", + "DIST", "NEXUS", + "nj", "phylip", + "dist", "nexus"]), _Option(["-seed", "-SEED", "SEED", "seed"], - [], - lambda x: isinstance(x, int), - False, "Seed number for bootstraps.", - True), + checker_function=lambda x: isinstance(x, int)), _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"], "Use Kimura's correction."), _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], "Ignore positions with gaps."), _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], - [], - lambda x: x in ["NODE", "BRANCH", "node", "branch"], - False, "Node OR branch position of bootstrap values in tree display", - True), + checker_function=lambda x: x in ["NODE", "BRANCH", + "node", "branch"]), _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], - [], - lambda x: x in ["NJ", "UPGMA", "nj", "upgma"], - False, "NJ or UPGMA", - True) + checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"]) ] AbstractCommandline.__init__(self, cmd, **kwargs) diff --git a/Bio/Align/Applications/_Mafft.py b/Bio/Align/Applications/_Mafft.py index ded385805bb..0230fa55b9c 100644 --- a/Bio/Align/Applications/_Mafft.py +++ b/Bio/Align/Applications/_Mafft.py @@ -88,24 +88,24 @@ def __init__(self, cmd="mafft", **kwargs): "Automatically select strategy. Default off."), #Distance is calculated based on the number of shared 6mers. Default: on _Switch(["--6merpair", "6merpair", "sixmerpair"], - "Distance is calculated based on the number of shared " - "6mers. Default: on"), + "Distance is calculated based on the number of shared " + "6mers. Default: on"), #All pairwise alignments are computed with the Needleman-Wunsch #algorithm. More accurate but slower than --6merpair. Suitable for a #set of globally alignable sequences. Applicable to up to ~200 #sequences. A combination with --maxiterate 1000 is recommended (G- #INS-i). Default: off (6mer distance is used) _Switch(["--globalpair", "globalpair"], - "All pairwise alignments are computed with the " - "Needleman-Wunsch algorithm. Default: off"), + "All pairwise alignments are computed with the " + "Needleman-Wunsch algorithm. Default: off"), #All pairwise alignments are computed with the Smith-Waterman #algorithm. More accurate but slower than --6merpair. Suitable for a #set of locally alignable sequences. Applicable to up to ~200 #sequences. A combination with --maxiterate 1000 is recommended (L- #INS-i). Default: off (6mer distance is used) _Switch(["--localpair", "localpair"], - "All pairwise alignments are computed with the " - "Smith-Waterman algorithm. Default: off"), + "All pairwise alignments are computed with the " + "Smith-Waterman algorithm. Default: off"), #All pairwise alignments are computed with a local algorithm with #the generalized affine gap cost (Altschul 1998). More accurate but #slower than --6merpair. Suitable when large internal gaps are @@ -113,35 +113,35 @@ def __init__(self, cmd="mafft", **kwargs): #maxiterate 1000 is recommended (E-INS-i). Default: off (6mer #distance is used) _Switch(["--genafpair", "genafpair"], - "All pairwise alignments are computed with a local " - "algorithm with the generalized affine gap cost " - "(Altschul 1998). Default: off"), + "All pairwise alignments are computed with a local " + "algorithm with the generalized affine gap cost " + "(Altschul 1998). Default: off"), #All pairwise alignments are computed with FASTA (Pearson and Lipman #1988). FASTA is required. Default: off (6mer distance is used) _Switch(["--fastapair", "fastapair"], - "All pairwise alignments are computed with FASTA " - "(Pearson and Lipman 1988). Default: off"), + "All pairwise alignments are computed with FASTA " + "(Pearson and Lipman 1988). Default: off"), #Weighting factor for the consistency term calculated from pairwise #alignments. Valid when either of --blobalpair, --localpair, -- #genafpair, --fastapair or --blastpair is selected. Default: 2.7 - _Option(["--weighti", "weighti"], [], - lambda x: isinstance(x, float), 0, - "Weighting factor for the consistency term calculated " - "from pairwise alignments. Default: 2.7", - 0), + _Option(["--weighti", "weighti"], + "Weighting factor for the consistency term calculated " + "from pairwise alignments. Default: 2.7", + checker_function=lambda x: isinstance(x, float), + equate=False), #Guide tree is built number times in the progressive stage. Valid #with 6mer distance. Default: 2 - _Option(["--retree", "retree"], [], - lambda x: isinstance(x, int), 0, - "Guide tree is built number times in the progressive " - "stage. Valid with 6mer distance. Default: 2", - 0), + _Option(["--retree", "retree"], + "Guide tree is built number times in the progressive " + "stage. Valid with 6mer distance. Default: 2", + checker_function=lambda x: isinstance(x, int), + equate=False), #Number cycles of iterative refinement are performed. Default: 0 - _Option(["--maxiterate", "maxiterate"], [], - lambda x: isinstance(x, int), 0, - "Number cycles of iterative refinement are performed. " - "Default: 0", - 0), + _Option(["--maxiterate", "maxiterate"], + "Number cycles of iterative refinement are performed. " + "Default: 0", + checker_function=lambda x: isinstance(x, int), + equate=False), #Use FFT approximation in group-to-group alignment. Default: on _Switch(["--fft", "fft"], "Use FFT approximation in group-to-group alignment. " @@ -149,8 +149,8 @@ def __init__(self, cmd="mafft", **kwargs): #Do not use FFT approximation in group-to-group alignment. Default: #off _Switch(["--nofft", "nofft"], - "Do not use FFT approximation in group-to-group " - "alignment. Default: off"), + "Do not use FFT approximation in group-to-group " + "alignment. Default: off"), #Alignment score is not checked in the iterative refinement stage. #Default: off (score is checked) _Switch(["--noscore", "noscore"], @@ -182,11 +182,11 @@ def __init__(self, cmd="mafft", **kwargs): "The PartTree algorithm is used with distances based " "on FASTA. Default: off"), #The number of partitions in the PartTree algorithm. Default: 50 - _Option(["--partsize", "partsize"], [], - lambda x: isinstance(x, int), 0, + _Option(["--partsize", "partsize"], "The number of partitions in the PartTree algorithm. " "Default: 50", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #Do not make alignment larger than number sequences. Valid only with #the --*parttree options. Default: the number of input sequences _Switch(["--groupsize", "groupsize"], @@ -194,82 +194,83 @@ def __init__(self, cmd="mafft", **kwargs): "Default: the number of input sequences"), #**** Parameter **** #Gap opening penalty at group-to-group alignment. Default: 1.53 - _Option(["--op", "op"], [], - lambda x: isinstance(x, float), 0, + _Option(["--op", "op"], "Gap opening penalty at group-to-group alignment. " "Default: 1.53", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #Offset value, which works like gap extension penalty, for group-to- #group alignment. Deafult: 0.123 - _Option(["--ep", "ep"], [], - lambda x: isinstance(x, float), 0, + _Option(["--ep", "ep"], "Offset value, which works like gap extension penalty, " "for group-to- group alignment. Default: 0.123", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #Gap opening penalty at local pairwise alignment. Valid when the -- #localpair or --genafpair option is selected. Default: -2.00 - _Option(["--lop", "lop"], [], - lambda x: isinstance(x, float), 0, + _Option(["--lop", "lop"], "Gap opening penalty at local pairwise alignment. " "Default: 0.123", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #Offset value at local pairwise alignment. Valid when the -- #localpair or --genafpair option is selected. Default: 0.1 - _Option(["--lep", "lep"], [], - lambda x: isinstance(x, float), 0, + _Option(["--lep", "lep"], "Offset value at local pairwise alignment. " "Default: 0.1", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #Gap extension penalty at local pairwise alignment. Valid when the - #-localpair or --genafpair option is selected. Default: -0.1 - _Option(["--lexp", "lexp"], [], - lambda x: isinstance(x, float), 0, + _Option(["--lexp", "lexp"], "Gap extension penalty at local pairwise alignment. " "Default: -0.1", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #Gap opening penalty to skip the alignment. Valid when the -- #genafpair option is selected. Default: -6.00 - _Option(["--LOP", "LOP"], [], - lambda x: isinstance(x, float), 0, + _Option(["--LOP", "LOP"], "Gap opening penalty to skip the alignment. " "Default: -6.00", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #Gap extension penalty to skip the alignment. Valid when the -- #genafpair option is selected. Default: 0.00 - _Option(["--LEXP", "LEXP"], [], - lambda x: isinstance(x, float), - 0, + _Option(["--LEXP", "LEXP"], "Gap extension penalty to skip the alignment. " "Default: 0.00", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #BLOSUM number matrix (Henikoff and Henikoff 1992) is used. #number=30, 45, 62 or 80. Default: 62 - _Option(["--bl", "bl"], [], - lambda x: x in BLOSUM_MATRICES, 0, + _Option(["--bl", "bl"], "BLOSUM number matrix is used. Default: 62", - 0), + checker_function=lambda x: x in BLOSUM_MATRICES, + equate=False), #JTT PAM number (Jones et al. 1992) matrix is used. number>0. #Default: BLOSUM62 - _Option(["--jtt", "jtt"], [], None, 0, + _Option(["--jtt", "jtt"], "JTT PAM number (Jones et al. 1992) matrix is used. " "number>0. Default: BLOSUM62", - 0), + equate=False), #Transmembrane PAM number (Jones et al. 1994) matrix is used. #number>0. Default: BLOSUM62 - _Option(["--tm", "tm"], [], - os.path.exists, 0, + _Option(["--tm", "tm"], "Transmembrane PAM number (Jones et al. 1994) " "matrix is used. number>0. Default: BLOSUM62", - 0), + checker_function=os.path.exists, + types=["file"], + equate=False), #Use a user-defined AA scoring matrix. The format of matrixfile is #the same to that of BLAST. Ignored when nucleotide sequences are #input. Default: BLOSUM62 - _Option(["--aamatrix", "aamatrix"], [], - os.path.exists, 0, + _Option(["--aamatrix", "aamatrix"], "Use a user-defined AA scoring matrix. " "Default: BLOSUM62", - 0), + checker_function=os.path.exists, + types=["file"], + equate=False), #Incorporate the AA/nuc composition information into the scoring #matrix. Default: off _Switch(["--fmodel", "fmodel"], @@ -309,10 +310,12 @@ def __init__(self, cmd="mafft", **kwargs): # form: "mafft --seed align1 --seed align2 [etc] input" # Effectively for n number of seed alignments. Here we're going to # assume 6 extra are enough - _Option(["--seed", "seed"], ["file"], os.path.exists, 0, + _Option(["--seed", "seed"], "Seed alignments given in alignment_n (fasta format) " "are aligned with sequences in input.", - 0), + checker_function=os.path.exists, + types=["file"], + equate=False), #The old solution of also defining extra parameters with #["--seed", "seed1"] etc worked, but clashes with the recent #code in the base class to look for duplicate paramters and raise @@ -322,13 +325,18 @@ def __init__(self, cmd="mafft", **kwargs): #assigned to the value? ####################### END SEEDS ################################ #The input (must be FASTA format) - _Argument(["input"], ["file"], os.path.exists, 1, - "Input file name"), + _Argument(["input"], + "Input file name", + checker_function=os.path.exists, + types=["file"], + is_required=True), ################################################################### #mafft-profile takes a second alignment input as an argument: #mafft-profile align1 align2 - _Argument(["input1"], ["file"], os.path.exists, 0, - "Second input file name for the mafft-profile command") + _Argument(["input1"], + "Second input file name for the mafft-profile command", + checker_function=os.path.exists, + types=["file"]), ] AbstractCommandline.__init__(self, cmd, **kwargs) diff --git a/Bio/Align/Applications/_Muscle.py b/Bio/Align/Applications/_Muscle.py index 9e433dd3175..0ac2eae7ad2 100644 --- a/Bio/Align/Applications/_Muscle.py +++ b/Bio/Align/Applications/_Muscle.py @@ -49,132 +49,124 @@ def __init__(self, cmd="muscle", **kwargs): self.parameters = \ [ #Can't use "in" as the final alias as this is a reserved word in python: - _Option(["-in", "in", "input"], ["file"], - None, 0, "Input filename", - 0), #No equate - _Option(["-out", "out"], ["file"], - None, 0, "Output filename", - 0), #No equate + _Option(["-in", "in", "input"], + "Input filename", + types=["file"], + equate=False), + _Option(["-out", "out"], + "Output filename", + types=["file"], + equate=False), _Switch(["-diags", "diags"], "Find diagonals (faster for similar sequences)"), _Switch(["-profile", "profile"], "Perform a profile alignment"), - _Option(["-in1", "in1"], ["file"], - None, 0, + _Option(["-in1", "in1"], "First input filename for profile alignment", - 0), - _Option(["-in2", "in2"], ["file"], - None, 0, + types=["file"], + equate=False), + _Option(["-in2", "in2"], "Second input filename for a profile alignment", - 0), + types=["file"], + equate=False), #anchorspacing Integer 32 Minimum spacing between - _Option(["-anchorspacing", "anchorspacing"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-anchorspacing", "anchorspacing"], "Minimum spacing between anchor columns", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #center Floating point [1] Center parameter. # Should be negative. - _Option(["-center", "center"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-center", "center"], "Center parameter - should be negative", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #cluster1 upgma upgmb Clustering method. - _Option(["-cluster1", "cluster1"], [], - lambda x: x in CLUSTERING_ALGORITHMS, 0, + _Option(["-cluster1", "cluster1"], "Clustering method used in iteration 1", - 0), + checker_function=lambda x: x in CLUSTERING_ALGORITHMS, + equate=False), #cluster2 upgmb cluster1 is used in # neighborjoining iteration 1 and 2, # cluster2 in later # iterations. - _Option(["-cluster2", "cluster2"], [], - lambda x: x in CLUSTERING_ALGORITHMS, 0, + _Option(["-cluster2", "cluster2"], "Clustering method used in iteration 2", - 0), + checker_function=lambda x: x in CLUSTERING_ALGORITHMS, + equate=False), #diaglength Integer 24 Minimum length of # diagonal. - _Option(["-diaglength", "diaglength"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-diaglength", "diaglength"], "Minimum length of diagonal", - 0), + checker_function=lambda x: isinstance(x, int), + equate=True), #diagmargin Integer 5 Discard this many # positions at ends of # diagonal. - _Option(["-diagmargin", "diagmargin"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-diagmargin", "diagmargin"], "Discard this many positions at ends of diagonal", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #distance1 kmer6_6 Kmer6_6 (amino) or Distance measure for # kmer20_3 Kmer4_6 (nucleo) iteration 1. # kmer20_4 # kbit20_3 # kmer4_6 - _Option(["-distance1", "distance1"], [], - lambda x: x in DISTANCE_MEASURES_ITER1, - 0, + _Option(["-distance1", "distance1"], "Distance measure for iteration 1", - 0), + checker_function=lambda x: x in DISTANCE_MEASURES_ITER1, + equate=False), #distance2 kmer6_6 pctid_kimura Distance measure for # kmer20_3 iterations 2, 3 ... # kmer20_4 # kbit20_3 # pctid_kimura # pctid_log - _Option(["-distance2", "distance2"], [], - lambda x: x in DISTANCE_MEASURES_ITER2, - 0, + _Option(["-distance2", "distance2"], "Distance measure for iteration 2", - 0), + checker_function=lambda x: x in DISTANCE_MEASURES_ITER2, + equate=False), #gapopen Floating point [1] The gap open score. # Must be negative. - _Option(["-gapopen", "gapopen"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-gapopen", "gapopen"], "Gap open score - negative number", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #hydro Integer 5 Window size for # determining whether a # region is hydrophobic. - _Option(["-hydro", "hydro"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-hydro", "hydro"], "Window size for hydrophobic region", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #hydrofactor Floating point 1.2 Multiplier for gap # open/close penalties in # hydrophobic regions. - _Option(["-hydrofactor", "hydrofactor"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-hydrofactor", "hydrofactor"], "Multiplier for gap penalties in hydrophobic regions", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #log File name None. Log file name (delete # existing file). - _Option(["-log", "log"], ["file"], - None, 0, + _Option(["-log", "log"], "Log file name", - 0), + types=["file"], + equate=False), #loga File name None. Log file name (append # to existing file). - _Option(["-loga", "loga"], ["file"], - None, 0, + _Option(["-loga", "loga"], "Log file name (append to existing file)", - 0), + types=["file"], + equate=False), #maxdiagbreak Integer 1 Maximum distance # between two diagonals # that allows them to # merge into one # diagonal. - _Option(["-maxdiagbreak", "maxdiagbreak"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-maxdiagbreak", "maxdiagbreak"], "Maximum distance between two diagonals that allows " "them to merge into one diagonal", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #maxhours Floating point None. Maximum time to run in # hours. The actual time # may exceed the @@ -183,43 +175,38 @@ def __init__(self, cmd="muscle", **kwargs): # are allowed, so 1.5 # means one hour and 30 # minutes. - _Option(["-maxhours", "maxhours"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-maxhours", "maxhours"], "Maximum time to run in hours", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #maxiters Integer 1, 2 ... 16 Maximum number of # iterations. - _Option(["-maxiters", "maxiters"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-maxiters", "maxiters"], "Maximum number of iterations", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #maxtrees Integer 1 Maximum number of new # trees to build in # iteration 2. - _Option(["-maxtrees", "maxtrees"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-maxtrees", "maxtrees"], "Maximum number of trees to build in iteration 2", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #minbestcolscore Floating point [1] Minimum score a column # must have to be an # anchor. - _Option(["-minbestcolscore", "minbestcolscore"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-minbestcolscore", "minbestcolscore"], "Minimum score a column must have to be an anchor", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #minsmoothscore Floating point [1] Minimum smoothed score # a column must have to # be an anchor. - _Option(["-minsmoothscore", "minsmoothscore"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-minsmoothscore", "minsmoothscore"], "Minimum smoothed score a column must have to " "be an anchor", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #objscore sp spm Objective score used by # ps tree dependent # dp refinement. @@ -233,49 +220,43 @@ def __init__(self, cmd="muscle", **kwargs): # ps=average profile- # sequence score. # xp=cross profile score. - _Option(["-objscore", "objscore"], [], - lambda x: x in OBJECTIVE_SCORES, - 0, + _Option(["-objscore", "objscore"], "Objective score used by tree dependent refinement", - 0), + checker_function=lambda x: x in OBJECTIVE_SCORES, + equate=False), #root1 pseudo psuedo Method used to root - _Option(["-root1", "root1"], [], - lambda x: x in TREE_ROOT_METHODS, - 0, + _Option(["-root1", "root1"], "Method used to root tree in iteration 1", - 0), + checker_function=lambda x: x in TREE_ROOT_METHODS, + equate=False), #root2 midlongestspan tree; root1 is used in # minavgleafdist iteration 1 and 2, # root2 in later # iterations. - _Option(["-root2", "root2"], [], - lambda x: x in TREE_ROOT_METHODS, - 0, + _Option(["-root2", "root2"], "Method used to root tree in iteration 2", - 0), + checker_function=lambda x: x in TREE_ROOT_METHODS, + equate=False), #seqtype protein auto Sequence type. # nucleo # auto - _Option(["-seqtype", "seqtype"], [], - lambda x: x in SEQUENCE_TYPES, - 0, + _Option(["-seqtype", "seqtype"], "Sequence type", - 0), + checker_function=lambda x: x in SEQUENCE_TYPES, + equate=False), #smoothscoreceil Floating point [1] Maximum value of column # score for smoothing # purposes. - _Option(["-smoothscoreceil", "smoothscoreceil"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-smoothscoreceil", "smoothscoreceil"], "Maximum value of column score for smoothing", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #smoothwindow Integer 7 Window used for anchor # column smoothing. - _Option(["-smoothwindow", "smoothwindow"], [], - lambda x: isinstance(x, int), - 0, + _Option(["-smoothwindow", "smoothwindow"], "Window used for anchor column smoothing", - 0), + checker_function=lambda x: isinstance(x, int), + equate=False), #SUEFF Floating point value 0.1 Constant used in UPGMB # between 0 and 1. clustering. Determines # the relative fraction @@ -283,30 +264,26 @@ def __init__(self, cmd="muscle", **kwargs): # (SUEFF) vs. nearest- # neighbor linkage (1 # SUEFF). - _Option(["-sueff", "sueff"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-sueff", "sueff"], "Constant used in UPGMB clustering", - 0), + checker_function=lambda x: isinstance(x, float), + equate=False), #tree1 File name None Save tree produced in - _Option(["-tree1", "tree1"], [], - None, 0, + _Option(["-tree1", "tree1"], "Save Newick tree from iteration 1", - 0), + equate=False), #tree2 first or second # iteration to given file # in Newick (Phylip- # compatible) format. - _Option(["-tree2", "tree2"], [], - None, 0, + _Option(["-tree2", "tree2"], "Save Newick tree from iteration 2", - 0), + equate=False), #weight1 none clustalw Sequence weighting - _Option(["-weight1", "weight1"], [], - lambda x: x in WEIGHTING_SCHEMES, - 0, + _Option(["-weight1", "weight1"], "Weighting scheme used in iteration 1", - 0), + checker_function=lambda x: x in WEIGHTING_SCHEMES, + equate=False), #weight2 henikoff scheme. # henikoffpb weight1 is used in # gsc iterations 1 and 2. @@ -325,11 +302,10 @@ def __init__(self, cmd="muscle", **kwargs): # method. # threeway=Gotoh three- # way method. - _Option(["-weight2", "weight2"], [], - lambda x: x in WEIGHTING_SCHEMES, - 0, + _Option(["-weight2", "weight2"], "Weighting scheme used in iteration 2", - 0), + checker_function=lambda x: x in WEIGHTING_SCHEMES, + equate=False), #################### FORMATS ####################################### # Multiple formats can be specified on the command line # If -msf appears it will be used regardless of other formats @@ -371,36 +347,34 @@ def __init__(self, cmd="muscle", **kwargs): _Switch(["-phys", "phys"], "Write output in PHYLIP sequential format"), ################## Additional specified output files ######### - _Option(["-phyiout", "phyiout"], ["file"], - None, 0, + _Option(["-phyiout", "phyiout"], "Write PHYLIP interleaved output to specified filename", - 0), #No equate - _Option(["-physout", "physout"], ["file"], - None, 0, - "Write PHYLIP sequential format to specified filename", - 0), #No equate - _Option(["-htmlout", "htmlout"], ["file"], - None, 0, - "Write HTML output to specified filename", - 0), #No equate - _Option(["-clwout", "clwout"], ["file"], - None, 0, + types=["file"], + equate=False), + _Option(["-physout", "physout"],"Write PHYLIP sequential format to specified filename", + types=["file"], + equate=False), + _Option(["-htmlout", "htmlout"],"Write HTML output to specified filename", + types=["file"], + equate=False), + _Option(["-clwout", "clwout"], "Write CLUSTALW output (with MUSCLE header) to specified " "filename", - 0), #No equate - _Option(["-clwstrictout", "clwstrictout"], ["file"], - None, 0, + types=["file"], + equate=False), + _Option(["-clwstrictout", "clwstrictout"], "Write CLUSTALW output (with version 1.81 header) to " "specified filename", - 0), #No equate - _Option(["-msfout", "msfout"], ["file"], - None, 0, + types=["file"], + equate=False), + _Option(["-msfout", "msfout"], "Write MSF format output to specified filename", - 0), #No equate - _Option(["-fastaout", "fastaout"], ["file"], - None, 0, + types=["file"], + equate=False), + _Option(["-fastaout", "fastaout"], "Write FASTA format output to specified filename", - 0), #No equate + types=["file"], + equate=False), ############## END FORMATS ################################### #anchors yes Use anchor optimization in tree dependent # refinement iterations. diff --git a/Bio/Align/Applications/_Prank.py b/Bio/Align/Applications/_Prank.py index dbfc5bd76b2..e953c8b58ec 100644 --- a/Bio/Align/Applications/_Prank.py +++ b/Bio/Align/Applications/_Prank.py @@ -29,29 +29,28 @@ def __init__(self, cmd="prank", **kwargs): self.parameters = [ ################## input/output parameters: ################## #-d=sequence_file - _Option(["-d", "d"], ["file"], - None, 1, "Input filename"), + _Option(["-d", "d"], + "Input filename", + types=["file"], + is_required=True), #-t=tree_file [default: no tree, generate approximate NJ tree] - _Option(["-t", "t"], ["file"], - None, 0, "Input guide tree filename"), + _Option(["-t", "t"],"Input guide tree filename", + types=["file"]), #-tree="tree_string" [tree in newick format; in double quotes] - _Option(["-tree", "tree"], [], - None, 0, + _Option(["-tree", "tree"], "Input guide tree as Newick string"), #-m=model_file [default: HKY2/WAG] - _Option(["-m", "m"], [], - None, 0, + _Option(["-m", "m"], "User-defined alignment model filename. Default: " "HKY2/WAG"), #-o=output_file [default: 'output'] - _Option(["-o", "o"], ["file"], - None, 0, + _Option(["-o", "o"], "Output filenames prefix. Default: 'output'\n " "Will write: output.?.fas (depending on requested " - "format), output.?.xml and output.?.dnd"), + "format), output.?.xml and output.?.dnd", + types=["file"]), #-f=output_format [default: 8] - _Option(["-f", "f"], [], - lambda x: x in OUTPUT_FORMAT_VALUES, 0, + _Option(["-f", "f"], "Output alignment format. Default: 8 FASTA\n" "Option are:\n" "1. IG/Stanford 8. Pearson/Fasta\n" @@ -59,7 +58,8 @@ def __init__(self, cmd="prank", **kwargs): "3. NBRF 12. Phylip\n" "4. EMBL 14. PIR/CODATA\n" "6. DNAStrider 15. MSF\n" - "7. Fitch 17. PAUP/NEXUS"), + "7. Fitch 17. PAUP/NEXUS", + checker_function=lambda x: x in OUTPUT_FORMAT_VALUES), _Switch(["-noxml", "noxml"], "Do not output XML files"), _Switch(["-notree", "notree"], @@ -77,37 +77,30 @@ def __init__(self, cmd="prank", **kwargs): _Switch(["-dots", "dots"], "Show insertion gaps as dots"), #-gaprate=# [gap opening rate; default: dna 0.025 / prot 0.0025] - _Option(["-gaprate", "gaprate"], [], - lambda x: isinstance(x, float), - 0, - "Gap opening rate. Default: dna 0.025 prot 0.0025"), + _Option(["-gaprate", "gaprate"], + "Gap opening rate. Default: dna 0.025 prot 0.0025", + checker_function=lambda x: isinstance(x, float)), #-gapext=# [gap extension probability; default: dna 0.5 / prot 0.5] - _Option(["-gapext", "gapext"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-gapext", "gapext"], "Gap extension probability. Default: dna 0.5 " - "/ prot 0.5"), + "/ prot 0.5", + checker_function=lambda x: isinstance(x, float)), #-dnafreqs=#,#,#,# [ACGT; default: empirical] - _Option(["-dnafreqs", "dnafreqs"], [], - lambda x: isinstance(x, bytes), - 0, + _Option(["-dnafreqs", "dnafreqs"], "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote " - "surrounded string value. Default: empirical"), + "surrounded string value. Default: empirical", + checker_function=lambda x: isinstance(x, bytes)), #-kappa=# [ts/tv rate ratio; default:2] - _Option(["-kappa", "kappa"], [], - lambda x: isinstance(x, int), - 0, - "Transition/transversion ratio. Default: 2"), + _Option(["-kappa", "kappa"], + "Transition/transversion ratio. Default: 2", + checker_function=lambda x: isinstance(x, int)), #-rho=# [pur/pyr rate ratio; default:1] - _Option(["-rho", "rho"], [], - lambda x: isinstance(x, int), - 0, - "Purine/pyrimidine ratio. Default: 1"), + _Option(["-rho", "rho"], + "Purine/pyrimidine ratio. Default: 1", + checker_function=lambda x: isinstance(x, int)), #-codon [for DNA: use empirical codon model] #Assuming this is an input file as in -m - _Option(["-codon", "codon"], [], - None, - 0, + _Option(["-codon", "codon"], "Codon model filename. Default: empirical codon model"), #-termgap [penalise terminal gaps normally] _Switch(["-termgap", "termgap"], @@ -118,11 +111,10 @@ def __init__(self, cmd="prank", **kwargs): "Do not compute posterior support. Default: compute"), #-pwdist=# [expected pairwise distance for computing guidetree; #default: dna 0.25 / prot 0.5] - _Option(["-pwdist", "pwdist"], [], - lambda x: isinstance(x, float), - 0, + _Option(["-pwdist", "pwdist"], "Expected pairwise distance for computing guidetree. " - "Default: dna 0.25 / prot 0.5"), + "Default: dna 0.25 / prot 0.5", + checker_function=lambda x: isinstance(x, float)), _Switch(["-once", "once"], "Run only once. Default: twice if no guidetree given"), _Switch(["-twice", "twice"], @@ -137,44 +129,38 @@ def __init__(self, cmd="prank", **kwargs): "Output each node; mostly for debugging"), #-matresize=# [matrix resizing multiplier] # Doesnt specify type but Float and Int work - _Option(["-matresize", "matresize"], [], - lambda x: isinstance(x, float) or isinstance(x, - int), - 0, - "Matrix resizing multiplier"), + _Option(["-matresize", "matresize"], + "Matrix resizing multiplier", + checker_function=lambda x: isinstance(x, float) or \ + isinstance(x, int)), #-matinitsize=# [matrix initial size multiplier] # Doesnt specify type but Float and Int work - _Option(["-matinitsize", "matinitsize"], [], - lambda x: isinstance(x, float) or isinstance(x, - int), - 0, - "Matrix initial size multiplier"), + _Option(["-matinitsize", "matinitsize"], + "Matrix initial size multiplier", + checker_function=lambda x: isinstance(x, float) or \ + isinstance(x, int)), _Switch(["-longseq", "longseq"], "Save space in pairwise alignments"), _Switch(["-pwgenomic", "pwgenomic"], "Do pairwise alignment, no guidetree"), #-pwgenomicdist=# [distance for pairwise alignment; default: 0.3] - _Option(["-pwgenomicdist", "pwgenomicdist"], [], - lambda x: isinstance(x, float), - 0, - "Distance for pairwise alignment. Default: 0.3"), + _Option(["-pwgenomicdist", "pwgenomicdist"], + "Distance for pairwise alignment. Default: 0.3", + checker_function=lambda x: isinstance(x, float)), #-scalebranches=# [scale branch lengths; default: dna 1 / prot 2] - _Option(["-scalebranches", "scalebranches"], [], - lambda x: isinstance(x, int), - 0, - "Scale branch lengths. Default: dna 1 / prot 2"), + _Option(["-scalebranches", "scalebranches"], + "Scale branch lengths. Default: dna 1 / prot 2", + checker_function=lambda x: isinstance(x, int)), #-fixedbranches=# [use fixed branch lengths] #Assume looking for a float - _Option(["-fixedbranches", "fixedbranches"], [], - lambda x: isinstance(x, float), - 0, - "Use fixed branch lengths of input value"), + _Option(["-fixedbranches", "fixedbranches"], + "Use fixed branch lengths of input value", + checker_function=lambda x: isinstance(x, float)), #-maxbranches=# [set maximum branch length] #Assume looking for a float - _Option(["-maxbranches", "maxbranches"], [], - lambda x: isinstance(x, float), - 0, - "Use maximum branch lengths of input value"), + _Option(["-maxbranches", "maxbranches"], + "Use maximum branch lengths of input value", + checker_function=lambda x: isinstance(x, float)), #-realbranches [disable branch length truncation] _Switch(["-realbranches", "realbranches"], "Disable branch length truncation"), diff --git a/Bio/Align/Applications/_Probcons.py b/Bio/Align/Applications/_Probcons.py index fba6f3f28c9..0ba6999c88f 100644 --- a/Bio/Align/Applications/_Probcons.py +++ b/Bio/Align/Applications/_Probcons.py @@ -62,49 +62,46 @@ def __init__(self, cmd="probcons", **kwargs): #cmdline.pre = 3 _Switch(["-clustalw", "clustalw"], "Use CLUSTALW output format instead of MFA"), - _Option(["-c", "c", "--consistency", "consistency" ], [], - lambda x: x in range(0,6), - 0, + _Option(["-c", "c", "--consistency", "consistency" ], "Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation", - 0), - _Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"], [], - lambda x: x in range(0,1001), - 0, - "Use 0 <= REPS <= 1000 (default: 100) passes of iterative-refinement", - 0), - _Option(["-pre", "--pre-training", "pre-training", "pre"], [], - lambda x: x in range(0,21), - 0, + checker_function=lambda x: x in range(0,6), + equate=False), + _Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"], + "Use 0 <= REPS <= 1000 (default: 100) passes of " + "iterative-refinement", + checker_function=lambda x: x in range(0,1001), + equate=False), + _Option(["-pre", "--pre-training", "pre-training", "pre"], "Use 0 <= REPS <= 20 (default: 0) rounds of pretraining", - 0), + checker_function=lambda x: x in range(0,21), + equate=False), _Switch(["-pairs", "pairs"], "Generate all-pairs pairwise alignments"), _Switch(["-viterbi", "viterbi"], - "Use Viterbi algorithm to generate all pairs (automatically enables -pairs)"), + "Use Viterbi algorithm to generate all pairs " + "(automatically enables -pairs)"), _Switch(["-verbose", "verbose"], "Report progress while aligning (default: off)"), - _Option(["-annot", "annot"], [], - None, - 0, + _Option(["-annot", "annot"], "Write annotation for multiple alignment to FILENAME", - 0), - _Option(["-t", "t", "--train", "train"], [], - None, - 0, - "Compute EM transition probabilities, store in FILENAME (default: no training)", - 0), + equate=False), + _Option(["-t", "t", "--train", "train"], + "Compute EM transition probabilities, store in FILENAME " + "(default: no training)", + equate=False), _Switch(["-e", "e", "--emissions", "emissions"], "Also reestimate emission probabilities (default: off)"), - _Option(["-p", "p", "--paramfile", "paramfile"], [], - None, - 0, + _Option(["-p", "p", "--paramfile", "paramfile"], "Read parameters from FILENAME", - 0), + equate=False), _Switch(["-a", "--alignment-order", "alignment-order", "a"], - "Print sequences in alignment order rather than input order (default: off)"), + "Print sequences in alignment order rather than input " + "order (default: off)"), #Input file name - _Argument(["input"], ["file"], None, 1, + _Argument(["input"], "Input file name. Must be multiple FASTA alignment "+ \ - "(MFA) format"), + "(MFA) format", + types=["file"], + is_required=True), ] AbstractCommandline.__init__(self, cmd, **kwargs) diff --git a/Bio/Align/Applications/_TCoffee.py b/Bio/Align/Applications/_TCoffee.py index f0c83f14ddb..f7d6308b053 100644 --- a/Bio/Align/Applications/_TCoffee.py +++ b/Bio/Align/Applications/_TCoffee.py @@ -41,70 +41,57 @@ class TCoffeeCommandline(AbstractCommandline): SEQ_TYPES = ["dna","protein","dna_protein"] def __init__(self, cmd="t_coffee", **kwargs): - self.parameters = \ - [_Option(["-output", "output"], [], - None, - 0, - """Specify the output type. + self.parameters = [ + _Option(["-output", "output"], + """Specify the output type. + One (or more separated by a comma) of: + 'clustalw_aln', 'clustalw', 'gcg', 'msf_aln', + 'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq' - One (or more separated by a comma) of: - 'clustalw_aln', 'clustalw', 'gcg', 'msf_aln', - 'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq' - - Note that of these Biopython's AlignIO module will only - read clustalw, pir, and fasta. - """, - 0), - _Option(["-infile", "infile"], ["file"], - None, - 1, - "Specify the input file.", - 0,), + Note that of these Biopython's AlignIO module will only + read clustalw, pir, and fasta. + """, #TODO - Can we read the PHYLIP output? + equate=False), + _Option(["-infile", "infile"], + "Specify the input file.", + types=["file"], + is_required=True, + equate=False), #Indicates the name of the alignment output by t_coffee. If the #default is used, the alignment is named .aln - _Option(["-outfile", "outfile"], ["file"], - None, - 0, - "Specify the output file. Default: .aln", - 0), + _Option(["-outfile", "outfile"], + "Specify the output file. Default: .aln", + types=["file"], + equate=False), _Switch(["-convert", "convert"], - "Specify you want to perform a file conversion"), - _Option(["-type", "type"], [], - lambda x: x in self.SEQ_TYPES, - 0, - "Specify the type of sequence being aligned", - 0), - _Option(["-outorder", "outorder"], [], - None, - 0, - "Specify the order of sequence to output" - "Either 'input', 'aligned' or of " - "Fasta file with sequence order", - 0), - _Option(["-matrix", "matrix"], [], - None, - 0, - "Specify the filename of the substitution matrix to use." - "Default: blosum62mt", - 0), - _Option(["-gapopen", "gapopen"], [], - lambda x: isinstance(x, int), - 0, - "Indicates the penalty applied for opening a gap " - "(negative integer)", - 0), - _Option(["-gapext", "gapext"], [], - lambda x: isinstance(x, int), - 0, - "Indicates the penalty applied for extending a " - "gap. (negative integer)", - 0), + "Specify you want to perform a file conversion"), + _Option(["-type", "type"], + "Specify the type of sequence being aligned", + checker_function=lambda x: x in self.SEQ_TYPES, + equate=False), + _Option(["-outorder", "outorder"], + "Specify the order of sequence to output" + "Either 'input', 'aligned' or of " + "Fasta file with sequence order", + equate=False), + _Option(["-matrix", "matrix"], + "Specify the filename of the substitution matrix to use." + "Default: blosum62mt", + equate=False), + _Option(["-gapopen", "gapopen"], + "Indicates the penalty applied for opening a gap " + "(negative integer)", + checker_function=lambda x: isinstance(x, int), + equate=False), + _Option(["-gapext", "gapext"], + "Indicates the penalty applied for extending a " + "gap. (negative integer)", + checker_function=lambda x: isinstance(x, int), + equate=False), _Switch(["-quiet", "quiet"], - "Turn off log output"), - _Option(["-mode", "mode"], [], - None, - 0, - "Specifies a special mode: genome, quickaln, dali, 3dcoffee", - 0) + "Turn off log output"), + _Option(["-mode", "mode"], + "Specifies a special mode: genome, quickaln, dali, 3dcoffee", + equate=False), ] AbstractCommandline.__init__(self, cmd, **kwargs) diff --git a/Bio/Application/__init__.py b/Bio/Application/__init__.py index 7b50f6c8fc0..92e98c78af5 100644 --- a/Bio/Application/__init__.py +++ b/Bio/Application/__init__.py @@ -465,6 +465,8 @@ class _Option(_AbstractParameter): is assumed to be a "human readable" name describing the option in one word. + o description -- a description of the option. + o param_types -- a list of string describing the type of parameter, which can help let programs know how to use it. The only supported values for this are the empty list (default), and ['file'] which means @@ -478,8 +480,6 @@ class _Option(_AbstractParameter): o equate -- should an equals sign be inserted if a value is used? - o description -- a description of the option. - o is_required -- a flag to indicate if the parameter must be set for the program to be run. @@ -487,9 +487,11 @@ class _Option(_AbstractParameter): o value -- the value of a parameter """ - def __init__(self, names = [], types = [], checker_function = None, - is_required = False, description = "", equate=True): + def __init__(self, names, description, types=[], checker_function=None, + is_required=False, equate=True): self.names = names + assert isinstance(description, basestring), \ + "%r for %s" % (description, names[-1]) assert types == [] or types == ["file"], \ "%r for %s" % (types, names[0]) self.param_types = types @@ -541,7 +543,7 @@ class _Switch(_AbstractParameter): NOTE - There is no value attribute, see is_set instead, """ - def __init__(self, names=[], description=""): + def __init__(self, names, description): self.names = names self.description = description self.is_set = False @@ -561,9 +563,11 @@ def __str__(self): class _Argument(_AbstractParameter): """Represent an argument on a commandline. """ - def __init__(self, names = [], types = [], checker_function = None, - is_required = False, description = ""): + def __init__(self, names, description, types=[], checker_function=None, + is_required=False): self.names = names + assert isinstance(description, basestring), \ + "%r for %s" % (description, names[-1]) assert types == [] or types == ["file"], \ "%r for %s" % (types, names[0]) self.param_types = types diff --git a/Bio/Blast/Applications.py b/Bio/Blast/Applications.py index 64d9a7e3bdd..be368c718ee 100644 --- a/Bio/Blast/Applications.py +++ b/Bio/Blast/Applications.py @@ -39,12 +39,15 @@ class FastacmdCommandline(AbstractCommandline): """ def __init__(self, cmd="fastacmd", **kwargs): - self.parameters = \ - [ - _Option(["-d", "database"], [], None, 1, - "The database to retrieve from."), - _Option(["-s", "search_string"], [], None, 1, - "The id to search for.") + self.parameters = [ + _Option(["-d", "database"], + "The database to retrieve from.", + is_required=True, + equate=False), + _Option(["-s", "search_string"], + "The id to search for.", + is_required=True, + equate=False) ] AbstractCommandline.__init__(self, cmd, **kwargs) @@ -57,52 +60,75 @@ class _BlastCommandLine(AbstractCommandline): """ def __init__(self, cmd=None, **kwargs): assert cmd is not None - extra_parameters = [\ + extra_parameters = [ _Switch(["--help", "help"], "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), - _Option(["-d", "database"], [], None, 1, - "The database to BLAST against.", False), - _Option(["-i", "infile"], ["file"], None, 1, - "The sequence to search with.", False), - _Option(["-e", "expectation"], [], None, 0, - "Expectation value cutoff.", False), - _Option(["-m", "align_view"], [], None, 0, + _Option(["-d", "database"], + "The database to BLAST against.", + is_required=True, + equate=False), + _Option(["-i", "infile"], + "The sequence to search with.", + types=["file"], + is_required=True, + equate=False), + _Option(["-e", "expectation"], + "Expectation value cutoff.", + equate=False), + _Option(["-m", "align_view"], "Alignment view. Integer 0-11. Use 7 for XML output.", - False), - _Option(["-o", "align_outfile", "outfile"], ["file"], None, 0, - "Output file for alignment.", False), - _Option(["-y", "xdrop_extension"], [], None, 0, - "Dropoff for blast extensions.", False), - _Option(["-F", "filter"], [], None, 0, - "Filter query sequence with SEG? T/F", False), - _Option(["-X", "xdrop"], [], None, 0, - "Dropoff value (bits) for gapped alignments."), - _Option(["-I", "show_gi"], [], None, 0, - "Show GI's in deflines? T/F", False), - _Option(["-J", "believe_query"], [], None, 0, - "Believe the query defline? T/F", False), - _Option(["-Z", "xdrop_final"], [], None, 0, - "X dropoff for final gapped alignment.", False), - _Option(["-z", "db_length"], [], None, 0, - "Effective database length.", False), - _Option(["-O", "seqalign_file"], ["file"], None, 0, - "seqalign file to output.", False), - _Option(["-v", "descriptions"], [], None, 0, - "Number of one-line descriptions.", False), - _Option(["-b", "alignments"], [], None, 0, - "Number of alignments.", False), - _Option(["-Y", "search_length"], [], None, 0, - "Effective length of search space (use zero for the " + \ - "real size).", False), - _Option(["-T", "html"], [], None, 0, - "Produce HTML output? T/F", False), - _Option(["-U", "case_filter"], [], None, 0, - "Use lower case filtering of FASTA sequence? T/F", False), - - _Option(["-a", "nprocessors"], [], None, 0, - "Number of processors to use.", False), - _Option(["-g", "gapped"], [], None, 0, - "Whether to do a gapped alignment. T/F", False), + equate=False), + _Option(["-o", "align_outfile", "outfile"], + "Output file for alignment.", + types=["file"], + equate=False), + _Option(["-y", "xdrop_extension"], + "Dropoff for blast extensions.", + equate=False), + _Option(["-F", "filter"], + "Filter query sequence with SEG? T/F", + equate=False), + _Option(["-X", "xdrop"], + "Dropoff value (bits) for gapped alignments.", + equate=False), + _Option(["-I", "show_gi"], + "Show GI's in deflines? T/F", + equate=False), + _Option(["-J", "believe_query"], + "Believe the query defline? T/F", + equate=False), + _Option(["-Z", "xdrop_final"], + "X dropoff for final gapped alignment.", + equate=False), + _Option(["-z", "db_length"], + "Effective database length.", + equate=False), + _Option(["-O", "seqalign_file"], + "seqalign file to output.", + types=["file"], + equate=False), + _Option(["-v", "descriptions"], + "Number of one-line descriptions.", + equate=False), + _Option(["-b", "alignments"], + "Number of alignments.", + equate=False), + _Option(["-Y", "search_length"], + "Effective length of search space (use zero for the " + "real size).", + equate=False), + _Option(["-T", "html"], + "Produce HTML output? T/F", + equate=False), + _Option(["-U", "case_filter"], + "Use lower case filtering of FASTA sequence? T/F", + equate=False), + _Option(["-a", "nprocessors"], + "Number of processors to use.", + equate=False), + _Option(["-g", "gapped"], + "Whether to do a gapped alignment. T/F", + equate=False), ] try: #Insert extra parameters - at the start just in case there @@ -128,22 +154,29 @@ class _BlastAllOrPgpCommandLine(_BlastCommandLine): """ def __init__(self, cmd=None, **kwargs): assert cmd is not None - extra_parameters = [\ - _Option(["-G", "gap_open"], [], None, 0, - "Gap open penalty", False), - _Option(["-E", "gap_extend"], [], None, 0, - "Gap extension penalty", False), - _Option(["-A", "window_size"], [], None, 0, - "Multiple hits window size", False), - _Option(["-f", "hit_extend"], [], None, 0, - "Threshold for extending hits.", False), - _Option(["-K", "keep_hits"], [], None, 0, - " Number of best hits from a region to keep.", False), - _Option(["-W", "wordsize"], [], None, 0, - "Word size", False), - _Option(["-P", "passes"], [], None, 0, + extra_parameters = [ + _Option(["-G", "gap_open"], + "Gap open penalty", + equate=False), + _Option(["-E", "gap_extend"], + "Gap extension penalty", + equate=False), + _Option(["-A", "window_size"], + "Multiple hits window size", + equate=False), + _Option(["-f", "hit_extend"], + "Threshold for extending hits.", + equate=False), + _Option(["-K", "keep_hits"], + " Number of best hits from a region to keep.", + equate=False), + _Option(["-W", "wordsize"], + "Word size", + equate=False), + _Option(["-P", "passes"], "Hits/passes. Integer 0-2. 0 for multiple hit, " - "1 for single hit (does not apply to blastn)", False), + "1 for single hit (does not apply to blastn)", + equate=False), ] try: #Insert extra parameters - at the start just in case there @@ -180,52 +213,67 @@ class BlastallCommandline(_BlastAllOrPgpCommandLine): def __init__(self, cmd="blastall",**kwargs): import warnings warnings.warn("Like blastall, this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) - self.parameters = [ \ + self.parameters = [ #Sorted in the same order as the output from blastall --help #which should make it easier to keep them up to date in future. #Note that some arguments are defined the the base clases (above). - _Option(["-p", "program"], [], None, 1, - "The blast program to use (e.g. blastp, blastn).", False), - _Option(["-q", "nuc_mismatch"], [], None, 0, - "Penalty for a nucleotide mismatch (blastn only).", False), - _Option(["-r", "nuc_match"], [], None, 0, - "Reward for a nucleotide match (blastn only).", False), - _Option(["-Q", "query_genetic_code"], [], None, 0, - "Query Genetic code to use.", False), - _Option(["-D", "db_genetic_code"], [], None, 0, - "DB Genetic code (for tblast[nx] only).", False), - _Option(["-M", "matrix"], [], None, 0, - "Matrix to use", False), - _Option(["-S", "strands"], [], None, 0, - "Query strands to search against database (for blast[nx], " + \ - "and tblastx). 3 is both, 1 is top, 2 is bottom.", False), - _Option(["-l", "restrict_gi"], [], None, 0, - "Restrict search of database to list of GI's.", False), - _Option(["-R", "checkpoint"], ["file"], None, 0, - "PSI-TBLASTN checkpoint input file.", False), - _Option(["-n", "megablast"], [], None, 0, - "MegaBlast search T/F.", False), + _Option(["-p", "program"], + "The blast program to use (e.g. blastp, blastn).", + is_required=True, + equate=False), + _Option(["-q", "nuc_mismatch"], + "Penalty for a nucleotide mismatch (blastn only).", + equate=False), + _Option(["-r", "nuc_match"], + "Reward for a nucleotide match (blastn only).", + equate=False), + _Option(["-Q", "query_genetic_code"], + "Query Genetic code to use.", + equate=False), + _Option(["-D", "db_genetic_code"], + "DB Genetic code (for tblast[nx] only).", + equate=False), + _Option(["-M", "matrix"], + "Matrix to use", + equate=False), + _Option(["-S", "strands"], + "Query strands to search against database (for blast[nx], " + "and tblastx). 3 is both, 1 is top, 2 is bottom.", + equate=False), + _Option(["-l", "restrict_gi"], + "Restrict search of database to list of GI's.", + equate=False), + _Option(["-R", "checkpoint"], + "PSI-TBLASTN checkpoint input file.", + types=["file"], + equate=False), + _Option(["-n", "megablast"], + "MegaBlast search T/F.", + equate=False), #The old name "region_length" is for consistency with our #old blastall function wrapper: - _Option(["-L", "region_length", "range_restriction"], [], - None, 0, + _Option(["-L", "region_length", "range_restriction"], """Location on query sequence (string format start,end). In older versions of BLAST, -L set the length of region - used to judge hits (see -K parameter).""", False), - _Option(["-w", "frame_shit_penalty"], [], None, 0, - "Frame shift penalty (OOF algorithm for blastx).", False), - _Option(["-t", "largest_intron"], [], None, 0, - "Length of the largest intron allowed in a translated " + \ - "nucleotide sequence when linking multiple distinct " + \ - "alignments. (0 invokes default behavior; a negative value " + \ - "disables linking.)", False), - _Option(["-B", "num_concatenated_queries"], [], None, 0, + used to judge hits (see -K parameter).""", + equate=False), + _Option(["-w", "frame_shit_penalty"], + "Frame shift penalty (OOF algorithm for blastx).", + equate=False), + _Option(["-t", "largest_intron"], + "Length of the largest intron allowed in a translated " + "nucleotide sequence when linking multiple distinct " + "alignments. (0 invokes default behavior; a negative value " + "disables linking.)", + equate=False), + _Option(["-B", "num_concatenated_queries"], "Number of concatenated queries, for blastn and tblastn.", - False), - _Option(["-V", "oldengine"], [], None, 0, - "Force use of the legacy BLAST engine.", False), - _Option(["-C", "composition_based"], [], None, 0, + equate=False), + _Option(["-V", "oldengine"], + "Force use of the legacy BLAST engine.", + equate=False), + _Option(["-C", "composition_based"], """Use composition-based statistics for tblastn: D or d: default (equivalent to F) 0 or F or f: no composition-based statistics @@ -235,10 +283,12 @@ def __init__(self, cmd="blastall",**kwargs): 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally For programs other than tblastn, must either be absent or be - D, F or 0.""", False), - _Option(["-s", "smith_waterman"], [], None, 0, - "Compute locally optimal Smith-Waterman alignments (This " + \ - "option is only available for gapped tblastn.) T/F", False), + D, F or 0.""", + equate=False), + _Option(["-s", "smith_waterman"], + "Compute locally optimal Smith-Waterman alignments (This " + "option is only available for gapped tblastn.) T/F", + equate=False), ] _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs) @@ -266,36 +316,56 @@ class BlastpgpCommandline(_BlastAllOrPgpCommandLine): def __init__(self, cmd="blastpgp",**kwargs): import warnings warnings.warn("Like blastpgp (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) - self.parameters = [ \ - _Option(["-C", "checkpoint_outfile"], ["file"], None, 0, - "Output file for PSI-BLAST checkpointing.", False), - _Option(["-R", "restart_infile"], ["file"], None, 0, - "Input file for PSI-BLAST restart.", False), - _Option(["-k", "hit_infile"], ["file"], None, 0, - "Hit file for PHI-BLAST.", False), - _Option(["-Q", "matrix_outfile"], ["file"], None, 0, - "Output file for PSI-BLAST matrix in ASCII.", False), - _Option(["-B", "align_infile"], ["file"], None, 0, - "Input alignment file for PSI-BLAST restart.", False), - _Option(["-S", "required_start"], [], None, 0, - "Start of required region in query.", False), - _Option(["-H", "required_end"], [], None, 0, - "End of required region in query.", False), - _Option(["-j", "npasses"], [], None, 0, - "Number of passes", False), - _Option(["-N", "nbits_gapping"], [], None, 0, - "Number of bits to trigger gapping.", False), - _Option(["-c", "pseudocounts"], [], None, 0, - "Pseudocounts constants for multiple passes.", False), - _Option(["-h", "model_threshold"], [], None, 0, - "E-value threshold to include in multipass model.", False), + self.parameters = [ + _Option(["-C", "checkpoint_outfile"], + "Output file for PSI-BLAST checkpointing.", + types=["file"], + equate=False), + _Option(["-R", "restart_infile"], + "Input file for PSI-BLAST restart.", + types=["file"], + equate=False), + _Option(["-k", "hit_infile"], + "Hit file for PHI-BLAST.", + types=["file"], + equate=False), + _Option(["-Q", "matrix_outfile"], + "Output file for PSI-BLAST matrix in ASCII.", + types=["file"], + equate=False), + _Option(["-B", "align_infile"], + "Input alignment file for PSI-BLAST restart.", + types=["file"], + equate=False), + _Option(["-S", "required_start"], + "Start of required region in query.", + equate=False), + _Option(["-H", "required_end"], + "End of required region in query.", + equate=False), + _Option(["-j", "npasses"], + "Number of passes", + equate=False), + _Option(["-N", "nbits_gapping"], + "Number of bits to trigger gapping.", + equate=False), + _Option(["-c", "pseudocounts"], + "Pseudocounts constants for multiple passes.", + equate=False), + _Option(["-h", "model_threshold"], + "E-value threshold to include in multipass model.", + equate=False), #Does the old name "region_length" for -L make sense? - _Option(["-L", "region_length"], [], None, 0, - "Cost to decline alignment (disabled when zero).", False), - _Option(["-M", "matrix"], [], None, 0, - "Matrix (string, default BLOSUM62).", False), - _Option(["-p", "program"], [], None, 1, - "The blast program to use (e.g blastpgp, patseedp or seedp).", False), + _Option(["-L", "region_length"], + "Cost to decline alignment (disabled when zero).", + equate=False), + _Option(["-M", "matrix"], + "Matrix (string, default BLOSUM62).", + equate=False), + _Option(["-p", "program"], + "The blast program to use (e.g blastpgp, patseedp or seedp).", + is_required=True, + equate=False), ] _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs) @@ -325,21 +395,26 @@ class RpsBlastCommandline(_BlastCommandLine): def __init__(self, cmd="rpsblast",**kwargs): import warnings warnings.warn("Like the old rpsblast (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) - self.parameters = [ \ + self.parameters = [ #Note -N is also in blastpgp, but not blastall - _Option(["-N", "nbits_gapping"], [], None, 0, - "Number of bits to trigger gapping.", False), + _Option(["-N", "nbits_gapping"], + "Number of bits to trigger gapping.", + equate=False), #Note blastall and blastpgp wrappers have -P with name "passes". #If this is the same thing, we should be consistent! - _Option(["-P", "multihit"], [], None, 0, - "0 for multiple hit, 1 for single hit", False), - _Option(["-l", "logfile"], ["file"], None, 0, - "Logfile name.", False), - _Option(["-p", "protein"], [], None, 0, - "Query sequence is protein. T/F", False), - _Option(["-L", "range_restriction"], [], None, 0, + _Option(["-P", "multihit"], + "0 for multiple hit, 1 for single hit", + equate=False), + _Option(["-l", "logfile"], + "Logfile name.", + types=["file"], + equate=False), + _Option(["-p", "protein"], + "Query sequence is protein. T/F", + equate=False), + _Option(["-L", "range_restriction"], "Location on query sequence (string format start,end).", - False), + equate=False), ] _BlastCommandLine.__init__(self, cmd, **kwargs) @@ -356,33 +431,40 @@ class _NcbibaseblastCommandline(AbstractCommandline): """ def __init__(self, cmd=None, **kwargs): assert cmd is not None - extra_parameters = [ \ + extra_parameters = [ #Core: _Switch(["-h", "h"], "Print USAGE and DESCRIPTION; ignore other arguments."), _Switch(["-help", "help"], - "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), + "Print USAGE, DESCRIPTION and ARGUMENTS description; " + "ignore other arguments."), _Switch(["-version", "version"], "Print version number; ignore other arguments."), # Output configuration options - _Option(["-out", "out"], ["file"], None, 0, - "Output file for alignment.", False), + _Option(["-out", "out"], + "Output file for alignment.", + types=["file"], + equate=False), #Formatting options: - _Option(["-outfmt", "outfmt"], [], None, 0, - "Alignment view. Integer 0-11. Use 5 for XML output (differs from classic BLAST which used 7 for XML).", - False), #TODO - Document and test the column options + _Option(["-outfmt", "outfmt"], + "Alignment view. Integer 0-11. Use 5 for XML output " + "(differs from classic BLAST which used 7 for XML).", + equate=False), + #TODO - Document and test the column options _Switch(["-show_gis","show_gis"], "Show NCBI GIs in deflines?"), - _Option(["-num_descriptions","num_descriptions"], [], None, 0, + _Option(["-num_descriptions","num_descriptions"], """Number of database sequences to show one-line descriptions for. Integer argument (at least zero). Default is 500. - See also num_alignments.""", False), - _Option(["-num_alignments","num_alignments"], [], None, 0, + See also num_alignments.""", + equate=False), + _Option(["-num_alignments","num_alignments"], """Number of database sequences to show num_alignments for. Integer argument (at least zero). Default is 200. - See also num_alignments.""", False), + See also num_alignments.""", + equate=False), _Switch(["-html", "html"], "Produce HTML output? See also the outfmt option."), #Miscellaneous options @@ -416,21 +498,27 @@ class _NcbiblastCommandline(_NcbibaseblastCommandline): """ def __init__(self, cmd=None, **kwargs): assert cmd is not None - extra_parameters = [ \ + extra_parameters = [ #Input query options: - _Option(["-query", "query"], ["file"], None, 0, - "The sequence to search with.", False), #Should this be required? - _Option(["-query_loc", "query_loc"], [], None, 0, - "Location on the query sequence (Format: start-stop)", False), + _Option(["-query", "query"], + "The sequence to search with.", + types=["file"], + equate=False), #Should this be required? + _Option(["-query_loc", "query_loc"], + "Location on the query sequence (Format: start-stop)", + equate=False), #General search options: - _Option(["-db", "db"], [], None, 0, - "The database to BLAST against.", False), #Should this be required? - _Option(["-evalue", "evalue"], [], None, 0, - "Expectation value cutoff.", False), - _Option(["-word_size","word_size"], [], None, 0, + _Option(["-db", "db"], + "The database to BLAST against.", + equate=False), + _Option(["-evalue", "evalue"], + "Expectation value cutoff.", + equate=False), + _Option(["-word_size","word_size"], """Word size for wordfinder algorithm. - Integer. Minimum 2.""", False), + Integer. Minimum 2.""", + equate=False), #BLAST-2-Sequences options: # - see subclass #Formatting options: @@ -442,62 +530,72 @@ def __init__(self, cmd=None, **kwargs): _Switch(["-lcase_masking", "lcase_masking"], "Use lower case filtering in query and subject sequence(s)?"), #Restrict search or results - _Option(["-gilist", "gilist"], ["file"], None, 0, + _Option(["-gilist", "gilist"], """Restrict search of database to list of GI's. Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""", - False), - _Option(["-negative_gilist", "negative_gilist"], ["file"], None, 0, + types=["file"], + equate=False), + _Option(["-negative_gilist", "negative_gilist"], """Restrict search of database to everything except the listed GIs. Incompatible with: gilist, seqidlist, remote, subject, subject_loc""", - False), - _Option(["-seqidlist", "seqidlist"], ["file"], None, 0, + types=["file"], + equate=False), + _Option(["-seqidlist", "seqidlist"], """Restrict search of database to list of SeqID's. Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""", - False), - _Option(["-entrez_query", "entrez_query"], [], None, 0, - "Restrict search with the given Entrez query (requires remote).", False), - _Option(["-max_target_seqs", "max_target_seqs"], [], None, 0, + types=["file"], + equate=False), + _Option(["-entrez_query", "entrez_query"], + "Restrict search with the given Entrez query (requires remote).", + equate=False), + _Option(["-max_target_seqs", "max_target_seqs"], """Maximum number of aligned sequences to keep. - Integer argument (at least one).""", False), + Integer argument (at least one).""", + equate=False), #Statistical options - _Option(["-dbsize", "dbsize"], [], None, 0, - "Effective length of the database (integer)", False), - _Option(["-searchsp", "searchsp"], [], None, 0, - "Effective length of the search space (integer)", False), + _Option(["-dbsize", "dbsize"], + "Effective length of the database (integer)", + equate=False), + _Option(["-searchsp", "searchsp"], + "Effective length of the search space (integer)", + equate=False), #Extension options - _Option(["-xdrop_ungap", "xdrop_ungap"], [], None, 0, + _Option(["-xdrop_ungap", "xdrop_ungap"], "X-dropoff value (in bits) for ungapped extensions. Float.", - False), - _Option(["-xdrop_gap", "xdrop_gap"], [], None, 0, + equate=False), + _Option(["-xdrop_gap", "xdrop_gap"], "X-dropoff value (in bits) for preliminary gapped extensions. Float.", - False), - _Option(["-xdrop_gap_final", "xdrop_gap_final"], [], None, 0, + equate=False), + _Option(["-xdrop_gap_final", "xdrop_gap_final"], "X-dropoff value (in bits) for final gapped alignment. Float.", - False), - _Option(["-window_size", "window_size"], [], None, 0, + equate=False), + _Option(["-window_size", "window_size"], "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.", - False), + equate=False), # Search strategy options _Option(["-import_search_strategy", "import_search_strategy"], - ["file"], None, 0, """Search strategy to use. - Incompatible with: export_search_strategy""", False), + Incompatible with: export_search_strategy""", + types=["file"], + equate=False), _Option(["-export_search_strategy", "export_search_strategy"], - ["file"], None, 0, """File name to record the search strategy used. - Incompatible with: import_search_strategy""", False), + Incompatible with: import_search_strategy""", + types=["file"], + equate=False), #Miscellaneous options - _Option(["-num_threads", "num_threads"], [], None, 0, + _Option(["-num_threads", "num_threads"], """Number of threads to use in the BLAST search. Integer of at least one. Default is one. - Incompatible with: remote""", False), + Incompatible with: remote""", + equate=False), _Switch(["-remote", "remote"], """Execute search remotely? @@ -532,43 +630,53 @@ class _Ncbiblast2SeqCommandline(_NcbiblastCommandline): """ def __init__(self, cmd=None, **kwargs): assert cmd is not None - extra_parameters = [ \ + extra_parameters = [ #General search options: - _Option(["-gapopen", "gapopen"], [], None, 0, - "Cost to open a gap (integer).", False), - _Option(["-gapextend", "gapextend"], [], None, 0, - "Cost to extend a gap (integer).", False), + _Option(["-gapopen", "gapopen"], + "Cost to open a gap (integer).", + equate=False), + _Option(["-gapextend", "gapextend"], + "Cost to extend a gap (integer).", + equate=False), #BLAST-2-Sequences options: - _Option(["-subject", "subject"], ["file"], None, 0, + _Option(["-subject", "subject"], """Subject sequence(s) to search. Incompatible with: db, gilist, negative_gilist. - See also subject_loc.""", False), - _Option(["-subject_loc", "subject_loc"], [], None, 0, + See also subject_loc.""", + types=["file"], + equate=False), + _Option(["-subject_loc", "subject_loc"], """Location on the subject sequence (Format: start-stop) Incompatible with: db, gilist, negative_gilist, remote. - See also subject.""", False), + See also subject.""", + equate=False), #Restrict search or results: - _Option(["-culling_limit", "culling_limit"], [], None, 0, + _Option(["-culling_limit", "culling_limit"], """Hit culling limit (integer). - If the query range of a hit is enveloped by that of at least this many - higher-scoring hits, delete the hit. + If the query range of a hit is enveloped by that of at + least this many higher-scoring hits, delete the hit. - Incompatible with: best_hit_overhang, best_hit_score_edge.""", False), - _Option(["-best_hit_overhang", "best_hit_overhang"], [], None, 0, + Incompatible with: best_hit_overhang, best_hit_score_edge. + """, + equate=False), + _Option(["-best_hit_overhang", "best_hit_overhang"], """Best Hit algorithm overhang value (recommended value: 0.1) Float between 0.0 and 0.5 inclusive. - Incompatible with: culling_limit.""", False), - _Option(["-best_hit_score_edge", "best_hit_score_edge"], [], None, 0, + Incompatible with: culling_limit.""", + equate=False), + _Option(["-best_hit_score_edge", "best_hit_score_edge"], """Best Hit algorithm score edge value (recommended value: 0.1) Float between 0.0 and 0.5 inclusive. - Incompatible with: culling_limit.""", False), ] + Incompatible with: culling_limit.""", + equate=False), + ] try: #Insert extra parameters - at the start just in case there #are any arguments which must come last: @@ -606,37 +714,44 @@ class NcbiblastpCommandline(_Ncbiblast2SeqCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="blastp", **kwargs): - self.parameters = [ \ + self.parameters = [ #General search options: - _Option(["-task", "task"], [], - lambda value : value in ["blastp", "blastp-short"], 0, - "Task to execute (string, blastp (default) or blastp-short).", False), - _Option(["-matrix", "matrix"], [], None, 0, - "Scoring matrix name (default BLOSUM62).", False), - _Option(["-threshold", "threshold"], [], None, 0, - "Minimum word score such that the word is added to the BLAST lookup table (float)", False), - _Option(["-comp_based_stats", "comp_based_stats"], [], - lambda value : value in "0Ft2TtDd", 0, + _Option(["-task", "task"], + "Task to execute (string, blastp (default) or blastp-short).", + checker_function=lambda value : value in ["blastp", + "blastp-short"], + equate=False), + _Option(["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62)."), + _Option(["-threshold", "threshold"], + "Minimum word score such that the word is added to the " + "BLAST lookup table (float)", + equate=False), + _Option(["-comp_based_stats", "comp_based_stats"], """Use composition-based statistics (string, default 2, i.e. True). 0, F or f: no composition-based statistics 2, T or t, D or d : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties - Note that tblastn also supports values of 1 and 3.""", False), + Note that tblastn also supports values of 1 and 3.""", + checker_function=lambda value : value in "0Ft2TtDd", + equate=False), #Query filtering options: - _Option(["-seg", "seg"], [], None, 0, + _Option(["-seg", "seg"], """Filter query sequence with SEG (string). Format: "yes", "window locut hicut", or "no" to disable. - Default is "12 2.2 2.5""", False), + Default is "12 2.2 2.5""", + equate=False), #Restrict search or results: - _Option(["-db_soft_mask", "db_soft_mask"], [], None, 0, + _Option(["-db_soft_mask", "db_soft_mask"], """Filtering algorithm for soft masking (integer). Filtering algorithm ID to apply to the BLAST database as soft masking. - Incompatible with: subject, subject_loc""", False), + Incompatible with: subject, subject_loc""", + equate=False), #Extension options: _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), @@ -675,80 +790,102 @@ class NcbiblastnCommandline(_Ncbiblast2SeqCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="blastn", **kwargs): - self.parameters = [ \ + self.parameters = [ #Input query options: - _Option(["-strand", "strand"], [], - lambda value : value in ["both", "minus", "plus"],0, + _Option(["-strand", "strand"], """Query strand(s) to search against database/subject. - Values allowed are "both" (default), "minus", "plus".""", False), + Values allowed are "both" (default), "minus", "plus".""", + checker_function=lambda value : value in ["both", + "minus", + "plus"], + equate=False), #General search options: - _Option(["-task", "task"], [], - lambda value : value in ['blastn', 'blastn-short', 'dc-megablast', - 'megablast', 'vecscreen'], 0, + _Option(["-task", "task"], """Task to execute (string, default 'megablast') Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' - (the default), or 'vecscreen'.""", False), - _Option(["-penalty", "penalty"], [], None, 0, - "Penalty for a nucleotide mismatch (integer, at most zero).", False), - _Option(["-reward", "reward"], [], None, 0, - "Reward for a nucleotide match (integer, at least zero).", False), + (the default), or 'vecscreen'.""", + checker_function=lambda value : value in ['blastn', + 'blastn-short', + 'dc-megablast', + 'megablast', + 'vecscreen'], + equate=False), + _Option(["-penalty", "penalty"], + "Penalty for a nucleotide mismatch (integer, at most zero).", + equate=False), + _Option(["-reward", "reward"], + "Reward for a nucleotide match (integer, at least zero).", + equate=False), #TODO - Does this need an argument or is it a switch? - #_Option(["-use_index", "use_index"], [], None, 0, - # "Use MegaBLAST database index (boolean).", False), - _Option(["-index_name", "index_name"], [], None, 0, - "MegaBLAST database index name.", False), + #_Option(["-use_index", "use_index"], + # "Use MegaBLAST database index (boolean).", + # equate=False), + _Option(["-index_name", "index_name"], + "MegaBLAST database index name.", + equate=False), #Query filtering options: - _Option(["-dust", "dust"], [], None, 0, + _Option(["-dust", "dust"], """Filter query sequence with DUST (string). Format: 'yes', 'level window linker', or 'no' to disable. Default = '20 64 1'. - """, False), - _Option(["-filtering_db", "filtering_db"], [], None, 0, - "BLAST database containing filtering elements (i.e. repeats).", False), - _Option(["-window_masker_taxid", "window_masker_taxid"], [], None, 0, - "Enable WindowMasker filtering using a Taxonomic ID (integer).", False), - _Option(["-window_masker_db", "window_masker_db"], [], None, 0, - "Enable WindowMasker filtering using this repeats database (string).", False), + """, + equate=False), + _Option(["-filtering_db", "filtering_db"], + "BLAST database containing filtering elements (i.e. repeats).", + equate=False), + _Option(["-window_masker_taxid", "window_masker_taxid"], + "Enable WindowMasker filtering using a Taxonomic ID (integer).", + equate=False), + _Option(["-window_masker_db", "window_masker_db"], + "Enable WindowMasker filtering using this repeats database (string).", + equate=False), #Restrict search or results: - _Option(["-db_soft_mask", "db_soft_mask"], [], None, 0, + _Option(["-db_soft_mask", "db_soft_mask"], """Filtering algorithm for soft masking (integer). Filtering algorithm ID to apply to the BLAST database as soft masking. - Incompatible with: subject, subject_loc""", False), - _Option(["-perc_identity", "perc_identity"], [], None, 0, - "Percent identity (real, 0 to 100 inclusive).", False), + Incompatible with: subject, subject_loc""", + equate=False), + _Option(["-perc_identity", "perc_identity"], + "Percent identity (real, 0 to 100 inclusive).", + equate=False), #Discontiguous MegaBLAST options - _Option(["-template_type", "template_type"], [], - lambda value : value in ['coding', 'coding_and_optimal','optimal'], 0, + _Option(["-template_type", "template_type"], """Discontiguous MegaBLAST template type (string). Allowed values: 'coding', 'coding_and_optimal' or 'optimal' - Requires: template_length.""", False), - _Option(["-template_length", "template_length"], [], - lambda value : value in [16,18,21,'16','18','21'], 0, + Requires: template_length.""", + checker_function=lambda value : value in ['coding', 'coding_and_optimal','optimal'], + equate=False), + _Option(["-template_length", "template_length"], """Discontiguous MegaBLAST template length (integer). Allowed values: 16, 18, 21 - Requires: template_type.""", False), + Requires: template_type.""", + checker_function=lambda value : value in [16,18,21,'16','18','21'], + equate=False), #Extension options: _Switch(["-no_greedy", "no_greedy"], "Use non-greedy dynamic programming extension"), - _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], [], None, 0, - "Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages (integer).", False), + _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], + "Minimum raw gapped score to keep an alignment in the " + "preliminary gapped and traceback stages (integer).", + equate=False), _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), - _Option(["-off_diagonal_range", "off_diagonal_range"], [], None, 0, + _Option(["-off_diagonal_range", "off_diagonal_range"], """Number of off-diagonals to search for the 2nd hit (integer). Expects a positive integer, or 0 (default) to turn off. Added in BLAST 2.2.23+ - """, False), + """, + equate=False), ] _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs) @@ -779,44 +916,53 @@ class NcbiblastxCommandline(_Ncbiblast2SeqCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="blastx", **kwargs): - self.parameters = [ \ + self.parameters = [ #Input query options: - _Option(["-strand", "strand"], [], - lambda value : value in ["both", "minus", "plus"],0, + _Option(["-strand", "strand"], """Query strand(s) to search against database/subject. - Values allowed are "both" (default), "minus", "plus".""", False), + Values allowed are "both" (default), "minus", "plus".""", + checker_function=lambda value : value in ["both", "minus", "plus"], + equate=False), #Input query options: - _Option(["-query_gencode", "query_gencode"], [], None, 0, + _Option(["-query_gencode", "query_gencode"], """Genetic code to use to translate query - Integer. Default is one.""", False), + Integer. Default is one.""", + equate=False), #General search options: - _Option(["-frame_shift_penalty", "frame_shift_penalty"], [], None, 0, - "Frame shift penalty (integer, at least 1, default ignored).", False), - _Option(["-max_intron_length", "max_intron_length"], [], None, 0, + _Option(["-frame_shift_penalty", "frame_shift_penalty"], + "Frame shift penalty (integer, at least 1, default ignored).", + equate=False), + _Option(["-max_intron_length", "max_intron_length"], """Maximum intron length (integer). Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments (a negative - value disables linking). Default zero.""", False), - _Option(["-matrix", "matrix"], [], None, 0, - "Scoring matrix name (default BLOSUM62).", False), - _Option(["-threshold", "threshold"], [], None, 0, - "Minimum word score such that the word is added to the BLAST lookup table (float)", False), + value disables linking). Default zero.""", + equate=False), + _Option(["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False), + _Option(["-threshold", "threshold"], + "Minimum word score such that the word is added to the " + "BLAST lookup table (float)", + equate=False), #Query filtering options: - _Option(["-seg", "seg"], [], None, 0, + _Option(["-seg", "seg"], """Filter query sequence with SEG (string). Format: "yes", "window locut hicut", or "no" to disable. - Default is "12 2.2 2.5""", False), + Default is "12 2.2 2.5""", + equate=False), #Restrict search or results: - _Option(["-db_soft_mask", "db_soft_mask"], [], None, 0, + _Option(["-db_soft_mask", "db_soft_mask"], """Filtering algorithm for soft masking (integer). Filtering algorithm ID to apply to the BLAST database as soft masking. - Incompatible with: subject, subject_loc""", False), + Incompatible with: subject, subject_loc""", + equate=False), #Extension options: _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), @@ -847,26 +993,30 @@ class NcbitblastnCommandline(_Ncbiblast2SeqCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="tblastn", **kwargs): - self.parameters = [ \ + self.parameters = [ #General search options: - _Option(["-db_gencode", "db_gencode"], [], None, 0, + _Option(["-db_gencode", "db_gencode"], """Genetic code to use to translate query - Integer. Default is one.""", False), - _Option(["-frame_shift_penalty", "frame_shift_penalty"], [], None, 0, - "Frame shift penalty (integer, at least 1, default ignored).", False), - _Option(["-max_intron_length", "max_intron_length"], [], None, 0, + Integer. Default is one.""", + equate=False), + _Option(["-frame_shift_penalty", "frame_shift_penalty"], + "Frame shift penalty (integer, at least 1, default ignored).", + equate=False), + _Option(["-max_intron_length", "max_intron_length"], """Maximum intron length (integer). Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments (a negative - value disables linking). Default zero.""", False), - _Option(["-matrix", "matrix"], [], None, 0, - "Scoring matrix name (default BLOSUM62).", False), - _Option(["-threshold", "threshold"], [], None, 0, - "Minimum word score such that the word is added to the BLAST lookup table (float)", False), - _Option(["-comp_based_stats", "comp_based_stats"], [], - lambda value : value in "0Ft12TtDd3", 0, + value disables linking). Default zero.""", + equate=False), + _Option(["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False), + _Option(["-threshold", "threshold"], + "Minimum word score such that the word is added to the BLAST lookup table (float)", + equate=False), + _Option(["-comp_based_stats", "comp_based_stats"], """Use composition-based statistics (string, default 2, i.e. True). 0, F or f: no composition-based statistics @@ -876,19 +1026,23 @@ def __init__(self, cmd="tblastn", **kwargs): 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally - Note that only tblastn supports values of 1 and 3.""", False), + Note that only tblastn supports values of 1 and 3.""", + checker_function=lambda value : value in "0Ft12TtDd3", + equate=False), #Query filtering options: - _Option(["-seg", "seg"], [], None, 0, + _Option(["-seg", "seg"], """Filter query sequence with SEG (string). Format: "yes", "window locut hicut", or "no" to disable. - Default is "12 2.2 2.5""", False), + Default is "12 2.2 2.5""", + equate=False), #Restrict search or results: - _Option(["-db_soft_mask", "db_soft_mask"], [], None, 0, + _Option(["-db_soft_mask", "db_soft_mask"], """Filtering algorithm ID to apply to the BLAST database as soft masking (string). Incompatible with: subject, subject_loc - """, False), + """, + equate=False), #Extension options: _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), @@ -896,10 +1050,12 @@ def __init__(self, cmd="tblastn", **kwargs): _Switch(["-use_sw_tback", "use_sw_tback"], "Compute locally optimal Smith-Waterman alignments?"), #PSI-TBLASTN options: - _Option(["-in_pssm", "in_pssm"], ["file"], None, 0, + _Option(["-in_pssm", "in_pssm"], """PSI-BLAST checkpoint file - Incompatible with: remote, query""", False), + Incompatible with: remote, query""", + types=["file"], + equate=False), ] _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs) @@ -927,45 +1083,54 @@ class NcbitblastxCommandline(_Ncbiblast2SeqCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="tblastx", **kwargs): - self.parameters = [ \ + self.parameters = [ #Input query options: - _Option(["-strand", "strand"], [], - lambda value : value in ["both", "minus", "plus"],0, + _Option(["-strand", "strand"], """Query strand(s) to search against database/subject. - Values allowed are "both" (default), "minus", "plus".""", False), + Values allowed are "both" (default), "minus", "plus".""", + checker_function=lambda value : value in ["both", "minus", "plus"], + equate=False), #Input query options: - _Option(["-query_gencode", "query_gencode"], [], None, 0, + _Option(["-query_gencode", "query_gencode"], """Genetic code to use to translate query - Integer. Default is one.""", False), + Integer. Default is one.""", + equate=False), #General search options: - _Option(["-db_gencode", "db_gencode"], [], None, 0, + _Option(["-db_gencode", "db_gencode"], """Genetic code to use to translate query - Integer. Default is one.""", False), - _Option(["-max_intron_length", "max_intron_length"], [], None, 0, + Integer. Default is one.""", + equate=False), + _Option(["-max_intron_length", "max_intron_length"], """Maximum intron length (integer). Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments (a negative - value disables linking). Default zero.""", False), - _Option(["-matrix", "matrix"], [], None, 0, - "Scoring matrix name (default BLOSUM62).", False), - _Option(["-threshold", "threshold"], [], None, 0, - "Minimum word score such that the word is added to the BLAST lookup table (float)", False), + value disables linking). Default zero.""", + equate=False), + _Option(["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False), + _Option(["-threshold", "threshold"], + "Minimum word score such that the word is added to the " + "BLAST lookup table (float)", + equate=False), #Query filtering options: - _Option(["-seg", "seg"], [], None, 0, + _Option(["-seg", "seg"], """Filter query sequence with SEG (string). Format: "yes", "window locut hicut", or "no" to disable. - Default is "12 2.2 2.5""", False), + Default is "12 2.2 2.5""", + equate=False), #Restrict search or results: - _Option(["-db_soft_mask", "db_soft_mask"], [], None, 0, + _Option(["-db_soft_mask", "db_soft_mask"], """Filtering algorithm ID to apply to the BLAST database as soft masking (string). Incompatible with: subject, subject_loc - """, False), + """, + equate=False), ] _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs) @@ -988,65 +1153,86 @@ class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="psiblast", **kwargs): - self.parameters = [ \ + self.parameters = [ #General search options: - _Option(["-matrix", "matrix"], [], None, 0, - "Scoring matrix name (default BLOSUM62).", False), - _Option(["-threshold", "threshold"], [], None, 0, - "Minimum word score such that the word is added to the BLAST lookup table (float)", False), - _Option(["-comp_based_stats", "comp_based_stats"], [], - lambda value : value in "0Ft2TtDd", 0, + _Option(["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False), + _Option(["-threshold", "threshold"], + "Minimum word score such that the word is added to the " + "BLAST lookup table (float)", + equate=False), + _Option(["-comp_based_stats", "comp_based_stats"], """Use composition-based statistics (string, default 2, i.e. True). 0, F or f: no composition-based statistics - 2, T or t, D or d : Composition-based score adjustment as in - Bioinformatics 21:902-911, 2005, conditioned on sequence properties + 2, T or t, D or d : Composition-based score adjustment + as in Bioinformatics 21:902-911, 2005, conditioned on + sequence properties - Note that tblastn also supports values of 1 and 3.""", False), + Note that tblastn also supports values of 1 and 3.""", + checker_function=lambda value : value in "0Ft2TtDd", + equate=False), #Query filtering options: - _Option(["-seg", "seg"], [], None, 0, + _Option(["-seg", "seg"], """Filter query sequence with SEG (string). Format: "yes", "window locut hicut", or "no" to disable. - Default is "12 2.2 2.5""", False), + Default is "12 2.2 2.5""", + equate=False), #Extension options: - _Option(["-gap_trigger", "gap_trigger"], [], None, 0, - "Number of bits to trigger gapping (float, default 22)", False), + _Option(["-gap_trigger", "gap_trigger"], + "Number of bits to trigger gapping (float, default 22)", + equate=False), #Miscellaneous options: _Switch(["-use_sw_tback", "use_sw_tback"], "Compute locally optimal Smith-Waterman alignments?"), #PSI-BLAST options: - _Option(["-num_iterations", "num_iterations"], [], None, 0, + _Option(["-num_iterations", "num_iterations"], """Number of iterations to perform, integer Integer of at least one. Default is one. - Incompatible with: remote""", False), - _Option(["-out_pssm", "out_pssm"], ["file"], None, 0, - "File name to store checkpoint file", False), - _Option(["-out_ascii_pssm", "out_ascii_pssm"], ["file"], None, 0, - "File name to store ASCII version of PSSM", False), - _Option(["-in_msa", "in_msa"], ["file"], None, 0, - """File name of multiple sequence alignment to restart PSI-BLAST - - Incompatible with: in_pssm, query""", False), - _Option(["-in_pssm", "in_pssm"], ["file"], None, 0, + Incompatible with: remote""", + equate=False), + _Option(["-out_pssm", "out_pssm"], + "File name to store checkpoint file", + types=["file"], + equate=False), + _Option(["-out_ascii_pssm", "out_ascii_pssm"], + "File name to store ASCII version of PSSM", + types=["file"], + equate=False), + _Option(["-in_msa", "in_msa"], + """File name of multiple sequence alignment to restart + PSI-BLAST + + Incompatible with: in_pssm, query""", + types=["file"], + equate=False), + _Option(["-in_pssm", "in_pssm"], """PSI-BLAST checkpoint file - Incompatible with: in_msa, query, phi_pattern""", False), + Incompatible with: in_msa, query, phi_pattern""", + types=["file"], + equate=False), #PSSM engine options: - _Option(["-pseudocount", "pseudocount"], [], None, 0, + _Option(["-pseudocount", "pseudocount"], """Pseudo-count value used when constructing PSSM - Integer. Default is zero.""", False), - _Option(["-inclusion_ethresh", "inclusion_ethresh"], [], None, 0, + Integer. Default is zero.""", + equate=False), + _Option(["-inclusion_ethresh", "inclusion_ethresh"], """E-value inclusion threshold for pairwise alignments - Float. Default is 0.002.""", False), + Float. Default is 0.002.""", + equate=False), #PHI-BLAST options: - _Option(["-phi_pattern", "phi_pattern"], ["file"], None, 0, + _Option(["-phi_pattern", "phi_pattern"], """File name containing pattern to search - Incompatible with: in_pssm""", False), + Incompatible with: in_pssm""", + types=["file"], + equate=False), ] _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs) @@ -1076,13 +1262,14 @@ class NcbirpsblastCommandline(_NcbiblastCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="rpsblast", **kwargs): - self.parameters = [ \ + self.parameters = [ #Query filtering options: - _Option(["-seg", "seg"], [], None, 0, + _Option(["-seg", "seg"], """Filter query sequence with SEG (string). Format: "yes", "window locut hicut", or "no" to disable. - Default is "12 2.2 2.5""", False), + Default is "12 2.2 2.5""", + equate=False), ] _NcbiblastCommandline.__init__(self, cmd, **kwargs) @@ -1105,24 +1292,29 @@ class NcbirpstblastnCommandline(_NcbiblastCommandline): subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="rpstblastn", **kwargs): - self.parameters = [ \ + self.parameters = [ #Input query options: - _Option(["-strand", "strand"], [], - lambda value : value in ["both", "minus", "plus"],0, + _Option(["-strand", "strand"], """Query strand(s) to search against database/subject. - Values allowed are "both" (default), "minus", "plus".""", False), + Values allowed are "both" (default), "minus", "plus".""", + checker_function=lambda value : value in ["both", + "minus", + "plus"], + equate=False), #Input query options: - _Option(["-query_gencode", "query_gencode"], [], None, 0, + _Option(["-query_gencode", "query_gencode"], """Genetic code to use to translate query - Integer. Default is one.""", False), + Integer. Default is one.""", + equate=False), #Query filtering options: - _Option(["-seg", "seg"], [], None, 0, + _Option(["-seg", "seg"], """Filter query sequence with SEG (string). Format: "yes", "window locut hicut", or "no" to disable. - Default is "12 2.2 2.5""", False), + Default is "12 2.2 2.5""", + equate=False), #Extension options: _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), @@ -1158,15 +1350,20 @@ class NcbiblastformatterCommandline(_NcbibaseblastCommandline): wrapper. """ def __init__(self, cmd="blast_formatter", **kwargs): - self.parameters = [ \ + self.parameters = [ # Input options - _Option(["-rid", "rid"], [], None, False, - "BLAST Request ID (RID), not compatiable with archive arg", False), - _Option(["-archive", "archive"], ["file"], None, False, - "Archive file of results, not compatiable with rid arg.", False), + _Option(["-rid", "rid"], + "BLAST Request ID (RID), not compatiable with archive arg", + equate=False), + _Option(["-archive", "archive"], + "Archive file of results, not compatiable with rid arg.", + types=["file"], + equate=False), # Restrict search or results - _Option(["-max_target_seqs", "max_target_seqs"], [], lambda value: value >= 1, False, - """Maximum number of aligned sequences to keep""", True), + _Option(["-max_target_seqs", "max_target_seqs"], + "Maximum number of aligned sequences to keep", + checker_function=lambda value: value >= 1, + equate=False), ] _NcbibaseblastCommandline.__init__(self, cmd, **kwargs) diff --git a/Bio/Emboss/Applications.py b/Bio/Emboss/Applications.py index 1d178920fc8..3b29f073cb5 100644 --- a/Bio/Emboss/Applications.py +++ b/Bio/Emboss/Applications.py @@ -92,8 +92,9 @@ class _EmbossCommandLine(_EmbossMinimalCommandLine): def __init__(self, cmd=None, **kwargs): assert cmd is not None extra_parameters = [ - _Option(["-outfile","outfile"], ["file"], None, 0, - "Output filename"), + _Option(["-outfile","outfile"], + "Output filename", + types=["file"]), ] try: #Insert extra parameters - at the start just in case there @@ -147,294 +148,294 @@ class Primer3Commandline(_EmbossCommandLine): """ def __init__(self, cmd="eprimer3", **kwargs): self.parameters = [ - _Option(["-sequence","sequence"], [], None, 1, - "Sequence to choose primers from."), - _Option(["-task","task"], [], None, 0, + _Option(["-sequence","sequence"], + "Sequence to choose primers from.", + is_required=True), + _Option(["-task","task"], "Tell eprimer3 what task to perform."), - _Option(["-hybridprobe","hybridprobe"], [], None, 0, + _Option(["-hybridprobe","hybridprobe"], "Find an internal oligo to use as a hyb probe."), - _Option(["-numreturn","numreturn"], [], None, 0, + _Option(["-numreturn","numreturn"], "Maximum number of primer pairs to return."), - _Option(["-includedregion","includedregion"], [], None, 0, + _Option(["-includedregion","includedregion"], "Subregion of the sequence in which to pick primers."), - _Option(["-target","target"], [], None, 0, + _Option(["-target","target"], "Sequence to target for flanking primers."), - _Option(["-excludedregion","excludedregion"], [], None, 0, + _Option(["-excludedregion","excludedregion"], "Regions to exclude from primer picking."), - _Option(["-forwardinput","forwardinput"], [], None, 0, + _Option(["-forwardinput","forwardinput"], "Sequence of a forward primer to check."), - _Option(["-reverseinput","reverseinput"], [], None, 0, + _Option(["-reverseinput","reverseinput"], "Sequence of a reverse primer to check."), - _Option(["-gcclamp","gcclamp"], [], None, 0, + _Option(["-gcclamp","gcclamp"], "The required number of Gs and Cs at the 3' of each primer."), - _Option(["-osize","osize"], [], None, 0, + _Option(["-osize","osize"], "Optimum length of a primer oligo."), - _Option(["-minsize","minsize"], [], None, 0, + _Option(["-minsize","minsize"], "Minimum length of a primer oligo."), - _Option(["-maxsize","maxsize"], [], None, 0, + _Option(["-maxsize","maxsize"], "Maximum length of a primer oligo."), - _Option(["-otm","otm"], [], None, 0, + _Option(["-otm","otm"], "Optimum melting temperature for a primer oligo."), - _Option(["-mintm","mintm"], [], None, 0, + _Option(["-mintm","mintm"], "Minimum melting temperature for a primer oligo."), - _Option(["-maxtm","maxtm"], [], None, 0, + _Option(["-maxtm","maxtm"], "Maximum melting temperature for a primer oligo."), - _Option(["-maxdifftm","maxdifftm"], [], None, 0, + _Option(["-maxdifftm","maxdifftm"], "Maximum difference in melting temperatures between " "forward and reverse primers."), - _Option(["-ogcpercent","ogcpercent"], [], None, 0, + _Option(["-ogcpercent","ogcpercent"], "Optimum GC% for a primer."), - _Option(["-mingc","mingc"], [], None, 0, + _Option(["-mingc","mingc"], "Minimum GC% for a primer."), - _Option(["-maxgc","maxgc"], [], None, 0, + _Option(["-maxgc","maxgc"], "Maximum GC% for a primer."), - _Option(["-saltconc","saltconc"], [], None, 0, + _Option(["-saltconc","saltconc"], "Millimolar salt concentration in the PCR."), - _Option(["-dnaconc","dnaconc"], [], None, 0, + _Option(["-dnaconc","dnaconc"], "Nanomolar concentration of annealing oligos in the PCR."), - _Option(["-maxployx","maxployx"], [], None, 0, + _Option(["-maxployx","maxployx"], "Maximum allowable mononucleotide repeat length in a primer."), #Primer length: - _Option(["-productosize","productosize"], [], None, 0, + _Option(["-productosize","productosize"], """Optimum size for the PCR product (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -psizeopt """), - _Option(["-psizeopt", "psizeopt"], [], None, 0, + _Option(["-psizeopt", "psizeopt"], """Optimum size for the PCR product. Option added in EMBOSS 6.1.0, replacing -productosize """), - _Option(["-productsizerange","productsizerange"], [], None, 0, + _Option(["-productsizerange","productsizerange"], """Acceptable range of length for the PCR product (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -prange """), - _Option(["-prange", "prange"], [], None, 0, + _Option(["-prange", "prange"], """Acceptable range of length for the PCR product. Option added in EMBOSS 6.1.0, replacing -productsizerange """), #Primer temperature: - _Option(["-productotm","productotm"], [], None, 0, + _Option(["-productotm","productotm"], """Optimum melting temperature for the PCR product (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ptmopt """), - _Option(["-ptmopt", "ptmopt"], [], None, 0, + _Option(["-ptmopt", "ptmopt"], """Optimum melting temperature for the PCR product. Option added in EMBOSS 6.1.0, replacing -productotm """), - _Option(["-productmintm","productmintm"], [], None, 0, + _Option(["-productmintm","productmintm"], """Minimum allowed melting temperature for the amplicon (OBSOLETE) Option replaced in EMBOSS 6.1.0 by -ptmmin """), - _Option(["-ptmmin", "ptmmin"], [], None, 0, + _Option(["-ptmmin", "ptmmin"], """Minimum allowed melting temperature for the amplicon."), Option added in EMBOSS 6.1.0, replacing -productmintm """), - _Option(["-productmaxtm","productmaxtm"], [], None, 0, + _Option(["-productmaxtm","productmaxtm"], """Maximum allowed melting temperature for the amplicon (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ptmmax """), - _Option(["-ptmmax", "ptmmax"], [], None, 0, + _Option(["-ptmmax", "ptmmax"], """Maximum allowed melting temperature for the amplicon."), Option added in EMBOSS 6.1.0, replacing -productmaxtm """), #Note to self, should be -oexcludedregion not -oexcluderegion - _Option(["-oexcludedregion", "oexcludedregion"], [], None, 0, + _Option(["-oexcludedregion", "oexcludedregion"], """Do not pick internal oligos in this region."), Option added in EMBOSS 6.1.0, replacing -oligoexcludedregion. """), - _Option(["-oligoexcludedregion", "oligoexcludedregion"], [], - None, 0, + _Option(["-oligoexcludedregion", "oligoexcludedregion"], """Do not pick internal oligos in this region (OBSOLETE)."), Option replaced in EMBOSS 6.1.0 by -oexcluderegion. """), - _Option(["-oligoinput","oligoinput"], [], None, 0, + _Option(["-oligoinput","oligoinput"], "Sequence of the internal oligo."), #Oligo length: - _Option(["-oligosize","oligosize"], [], None, 0, + _Option(["-oligosize","oligosize"], """Optimum length of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -osizeopt. """), - _Option(["-osizeopt", "osizeopt"], [], None, 0, + _Option(["-osizeopt", "osizeopt"], """Optimum length of internal oligo. Option added in EMBOSS 6.1.0, replaces -oligosize """), - _Option(["-oligominsize","oligominsize"], [], None, 0, + _Option(["-oligominsize","oligominsize"], """Minimum length of internal oligo (OBSOLETE)."), Option replaced in EMBOSS 6.1.0 by -ominsize. """), - _Option(["-ominsize", "ominsize"], [], None, 0, + _Option(["-ominsize", "ominsize"], """Minimum length of internal oligo." Option added in EMBOSS 6.1.0, replaces -oligominsize """), - _Option(["-oligomaxsize","oligomaxsize"], [], None, 0, + _Option(["-oligomaxsize","oligomaxsize"], """Maximum length of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -omaxsize. """), - _Option(["-omaxsize", "omaxsize"], [], None, 0, + _Option(["-omaxsize", "omaxsize"], """Maximum length of internal oligo. Option added in EMBOSS 6.1.0, replaces -oligomaxsize """), #Oligo GC temperature: - _Option(["-oligotm","oligotm"], [], None, 0, + _Option(["-oligotm","oligotm"], """Optimum melting temperature of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -otmopt. """), - _Option(["-otmopt", "otmopt"], [], None, 0, + _Option(["-otmopt", "otmopt"], """Optimum melting temperature of internal oligo. Option added in EMBOSS 6.1.0. """), - _Option(["-oligomintm","oligomintm"], [], None, 0, + _Option(["-oligomintm","oligomintm"], """Minimum melting temperature of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -otmmin. """), - _Option(["-otmmin", "otmmin"], [], None, 0, + _Option(["-otmmin", "otmmin"], """Minimum melting temperature of internal oligo. Option added in EMBOSS 6.1.0, replacing -oligomintm """), - _Option(["-oligomaxtm","oligomaxtm"], [], None, 0, + _Option(["-oligomaxtm","oligomaxtm"], """Maximum melting temperature of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -otmmax. """), - _Option(["-otmmax", "otmmax"], [], None, 0, + _Option(["-otmmax", "otmmax"], """Maximum melting temperature of internal oligo. Option added in EMBOSS 6.1.0, replacing -oligomaxtm """), #Oligo GC percent: - _Option(["-oligoogcpercent","oligoogcpercent"], [], None, 0, + _Option(["-oligoogcpercent","oligoogcpercent"], """Optimum GC% for internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ogcopt. """), - _Option(["-ogcopt", "ogcopt"], [], None, 0, + _Option(["-ogcopt", "ogcopt"], """Optimum GC% for internal oligo." Option added in EMBOSS 6.1.0, replacing -oligoogcpercent """), - _Option(["-oligomingc","oligomingc"], [], None, 0, + _Option(["-oligomingc","oligomingc"], """Minimum GC% for internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ogcmin. """), - _Option(["-ogcmin", "ogcmin"], [], None, 0, + _Option(["-ogcmin", "ogcmin"], """Minimum GC% for internal oligo. Option added in EMBOSS 6.1.0, replacing -oligomingc """), - _Option(["-oligomaxgc","oligomaxgc"], [], None, 0, + _Option(["-oligomaxgc","oligomaxgc"], """Maximum GC% for internal oligo. Option replaced in EMBOSS 6.1.0 by -ogcmax """), - _Option(["-ogcmax", "ogcmax"], [], None, 0, + _Option(["-ogcmax", "ogcmax"], """Maximum GC% for internal oligo."), Option added in EMBOSS 6.1.0, replacing -oligomaxgc """), #Oligo salt concentration: - _Option(["-oligosaltconc","oligosaltconc"], [], None, 0, + _Option(["-oligosaltconc","oligosaltconc"], """Millimolar concentration of salt in the hybridisation."), Option replaced in EMBOSS 6.1.0 by -osaltconc """), - _Option(["-osaltconc", "osaltconc"], [], None, 0, + _Option(["-osaltconc", "osaltconc"], """Millimolar concentration of salt in the hybridisation."), Option added in EMBOSS 6.1.0, replacing -oligosaltconc """), - _Option(["-oligodnaconc","oligodnaconc"], [], None, 0, + _Option(["-oligodnaconc","oligodnaconc"], """Nanomolar concentration of internal oligo in the hybridisation. Option replaced in EMBOSS 6.1.0 by -odnaconc """), - _Option(["-odnaconc", "odnaconc"], [], None, 0, + _Option(["-odnaconc", "odnaconc"], """Nanomolar concentration of internal oligo in the hybridisation. Option added in EMBOSS 6.1.0, replacing -oligodnaconc """), #Oligo self complementarity - _Option(["-oligoselfany","oligoselfany"], [], None, 0, + _Option(["-oligoselfany","oligoselfany"], """Maximum allowable alignment score for self-complementarity (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -oanyself """), - _Option(["-oanyself", "oanyself"], [], None, 0, + _Option(["-oanyself", "oanyself"], """Maximum allowable alignment score for self-complementarity."), Option added in EMBOSS 6.1.0, replacing -oligoselfany """), - _Option(["-oligoselfend","oligoselfend"], [], None, 0, - """Maximum allowable 3`-anchored global alignment score + _Option(["-oligoselfend","oligoselfend"], + """Maximum allowable 3`-anchored global alignment score " for self-complementarity (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -oendself """), - _Option(["-oendself", "oendself"], [], None, 0, + _Option(["-oendself", "oendself"], """Max 3`-anchored self-complementarity global alignment score. Option added in EMBOSS 6.1.0, replacing -oligoselfend """), - _Option(["-oligomaxpolyx","oligomaxpolyx"], [], None, 0, + _Option(["-oligomaxpolyx","oligomaxpolyx"], """Maximum length of mononucleotide repeat in internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -opolyxmax """), - _Option(["-opolyxmax", "opolyxmax"], [], None, 0, + _Option(["-opolyxmax", "opolyxmax"], """Maximum length of mononucleotide repeat in internal oligo."), Option added in EMBOSS 6.1.0, replacing -oligomaxpolyx """), - _Option(["-mispriminglibraryfile","mispriminglibraryfile"], [], None, 0, + _Option(["-mispriminglibraryfile","mispriminglibraryfile"], "File containing library of sequences to avoid amplifying"), - _Option(["-maxmispriming","maxmispriming"], [], None, 0, + _Option(["-maxmispriming","maxmispriming"], "Maximum allowed similarity of primers to sequences in " "library specified by -mispriminglibrary"), - _Option(["-oligomaxmishyb","oligomaxmishyb"], [], None, 0, + _Option(["-oligomaxmishyb","oligomaxmishyb"], """Maximum alignment score for hybridisation of internal oligo to library specified by -oligomishyblibraryfile (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -omishybmax """), - _Option(["-omishybmax", "omishybmax"], [], None, 0, + _Option(["-omishybmax", "omishybmax"], """Maximum alignment score for hybridisation of internal oligo to library specified by -mishyblibraryfile. Option added in EMBOSS 6.1.0, replacing -oligomaxmishyb """), _Option(["-oligomishyblibraryfile", "oligomishyblibraryfile"], - [], None, 0, + """Library file of seqs to avoid internal oligo hybridisation (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -mishyblibraryfile """), - _Option(["-mishyblibraryfile", "mishyblibraryfile"], [], None, 0, + _Option(["-mishyblibraryfile", "mishyblibraryfile"], """Library file of seqs to avoid internal oligo hybridisation. Option added in EMBOSS 6.1.0, replacing -oligomishyblibraryfile """), - _Option(["-explainflag","explainflag"], [], None, 0, + _Option(["-explainflag","explainflag"], "Produce output tags with eprimer3 statistics"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -445,21 +446,25 @@ class PrimerSearchCommandline(_EmbossCommandLine): """ def __init__(self, cmd="primersearch", **kwargs): self.parameters = [ - _Option(["-seqall","-sequences","sequences","seqall"], [], - None, 1, "Sequence to look for the primer pairs in."), + _Option(["-seqall","-sequences","sequences","seqall"], + "Sequence to look for the primer pairs in.", + is_required=True), #When this wrapper was written primersearch used -sequences #as the argument name. Since at least EMBOSS 5.0 (and #perhaps earlier) this has been -seqall instead. - _Option(["-infile","-primers","primers","infile"], ["file"], - None, 1, "File containing the primer pairs to search for."), + _Option(["-infile","-primers","primers","infile"], + "File containing the primer pairs to search for.", + types=["file"], + is_required=True), #When this wrapper was written primersearch used -primers #as the argument name. Since at least EMBOSS 5.0 (and #perhaps earlier) this has been -infile instead. - _Option(["-mismatchpercent","mismatchpercent"], [], None, 1, - "Allowed percentage mismatch (any integer value, default 0)."), - _Option(["-snucleotide","snucleotide"], [], None, 0, + _Option(["-mismatchpercent","mismatchpercent"], + "Allowed percentage mismatch (any integer value, default 0).", + is_required=True), + _Option(["-snucleotide","snucleotide"], "Sequences are nucleotide (boolean)"), - _Option(["-sprotein","sprotein"], [], None, 0, + _Option(["-sprotein","sprotein"], "Sequences are protein (boolean)"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -477,31 +482,33 @@ def __init__(self, cmd="eprotdist", **kwargs): import Bio warnings.warn("Bio.Emboss.Application.EProtDistCommandline has been deprecated; please use 'fprotdist' instead (see FProtDistCommandline).", Bio.BiopythonDeprecationWarning) self.parameters = [ - _Option(["-msf","msf"], [], None, 1, - "File containing sequences"), - _Option(["-method","method"], [], None, 1, - "Choose the method to use"), - _Option(["-categ","categ"], [], None, 0, + _Option(["-msf","msf"], + "File containing sequences", + is_required=True), + _Option(["-method","method"], + "Choose the method to use", + is_required=True), + _Option(["-categ","categ"], "Choose the category to use"), - _Option(["-gencode","gencode"], [], None, 0, + _Option(["-gencode","gencode"], "Which genetic code"), - _Option(["-prob","prob"], [], None, 0, + _Option(["-prob","prob"], "Prob change category (1.0=easy)"), - _Option(["-tranrate","tranrate"], [], None, 0, + _Option(["-tranrate","tranrate"], "Transition/transversion ratio"), - _Option(["-freqa","freqa"], [], None, 0, + _Option(["-freqa","freqa"], "Frequency for A"), - _Option(["-freqc","freqc"], [], None, 0, + _Option(["-freqc","freqc"], "Frequency for C"), - _Option(["-freqg","freqg"], [], None, 0, + _Option(["-freqg","freqg"], "Frequency for G"), - _Option(["-freqt","freqt"], [], None, 0, + _Option(["-freqt","freqt"], "Frequency for T"), - _Option(["-printdata","printdata"], [], None, 0, + _Option(["-printdata","printdata"], "Print out the data at start of run"), - _Option(["-progress","progress"], [], None, 0, + _Option(["-progress","progress"], "Print indications of progress of run"), - _Option(["-basefrequency","basefrequency"], [], None, 0, + _Option(["-basefrequency","basefrequency"], "Use empirical base frequencies")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -518,37 +525,42 @@ def __init__(self, cmd="eneighbor", **kwargs): import Bio warnings.warn("Bio.Emboss.Application.ENeighborCommandline has been deprecated; please use 'fneighbor' instead (see FNeighborCommandline).", Bio.BiopythonDeprecationWarning) self.parameters = [ - _Option(["-infile","infile"], [], None, 1, - "infile value"), - _Option(["-trout","trout"], [], None, 1, - "Create a tree file"), - _Option(["-treefile","treefile"], [], None, 1, - "Tree file name"), - _Option(["-nj","nj"], [], None, 1, - "Neighbor-joining"), - _Option(["-noog","noog"], [], None, 1, - "Outgroup root"), - _Option(["-outgnum","outgnum"], [], None, 0, + _Option(["-infile","infile"], + "infile value", + is_required=True), + _Option(["-trout","trout"], + "Create a tree file", + is_required=True), + _Option(["-treefile","treefile"], + "Tree file name", + is_required=True), + _Option(["-nj","nj"], + "Neighbor-joining", + is_required=True), #TODO - Check this + _Option(["-noog","noog"], + "Outgroup root", + is_required=True), #TODO - Check this + _Option(["-outgnum","outgnum"], "number of the outgroup"), - _Option(["-randseed","randseed"], [], None, 0, + _Option(["-randseed","randseed"], "Random number seed (must be odd)"), - _Option(["-datasets","datasets"], [], None, 0, + _Option(["-datasets","datasets"], "How many data sets"), - _Option(["-drawtree","drawtree"], [], None, 0, + _Option(["-drawtree","drawtree"], "Draw tree"), - _Option(["-lt","lt"], [], None, 0, + _Option(["-lt","lt"], "Lower-triangular data matrix"), - _Option(["-ut","ut"], [], None, 0, + _Option(["-ut","ut"], "Upper-triangular data matrix"), - _Option(["-sr","sr"], [], None, 0, + _Option(["-sr","sr"], "Subreplicates"), - _Option(["-random","random"], [], None, 0, + _Option(["-random","random"], "Randomize input order of species"), - _Option(["-multsets","multsets"], [], None, 0, + _Option(["-multsets","multsets"], "Analyze multiple data sets"), - _Option(["-printdata","printdata"], [], None, 0, + _Option(["-printdata","printdata"], "Print out the data at start of run"), - _Option(["-progress","progress"], [], None, 0, + _Option(["-progress","progress"], "Print indications of progress of run")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -565,44 +577,47 @@ def __init__(self, cmd="eprotpars", **kwargs): import Bio warnings.warn("Bio.Emboss.Application.EProtParsCommandline has been deprecated; please use 'fprotpars' instead (see FProtParsCommandline).", Bio.BiopythonDeprecationWarning) self.parameters = [ - _Option(["-msf","msf"], ["file"], None, 1, - "Sequences file to be read in"), - _Option(["-besttree","besttree"], [], None, 0, + _Option(["-msf","msf"], + "Sequences file to be read in", + types=["file"], + is_required=True), + _Option(["-besttree","besttree"], "Search for the best tree"), - _Option(["-random","random"], [], None, 0, + _Option(["-random","random"], "Randomize input order of species"), - _Option(["-norandom","norandom"], [], None, 0, + _Option(["-norandom","norandom"], "Do not randomize input order of species"), - _Option(["-randseed","randseed"], [], None, 0, + _Option(["-randseed","randseed"], "Random number seed (must be odd)"), - _Option(["-randtimes","randtimes"], [], None, 0, + _Option(["-randtimes","randtimes"], "How many times to randomize"), - _Option(["-og","og"], [], None, 0, + _Option(["-og","og"], "Use an outgroup root"), - _Option(["-noog","noog"], [], None, 0, + _Option(["-noog","noog"], "Do not use an outgroup root"), - _Option(["-outgnum","outgnum"], [], None, 0, + _Option(["-outgnum","outgnum"], "Number of the outgroup"), - _Option(["-thresh","thresh"], [], None, 0, + _Option(["-thresh","thresh"], "Use Threshold parsimony"), - _Option(["-valthresh","valthresh"], [], None, 0, + _Option(["-valthresh","valthresh"], "threshold value"), - _Option(["-printdata","printdata"], [], None, 0, + _Option(["-printdata","printdata"], "Print out the data at start of run"), - _Option(["-progress","progress"], [], None, 0, + _Option(["-progress","progress"], "Print indications of progress of run"), - _Option(["-steps","steps"], [], None, 0, + _Option(["-steps","steps"], "Print out steps in each site"), - _Option(["-seqatnodes","seqatnodes"], [], None, 0, + _Option(["-seqatnodes","seqatnodes"], "Print sequences at all nodes of tree"), - _Option(["-drawtree","drawtree"], [], None, 0, + _Option(["-drawtree","drawtree"], "Draw tree"), - _Option(["-trout","trout"], [], None, 0, + _Option(["-trout","trout"], "Create a tree file"), - _Option(["-notrout","notrout"], [], None, 0, + _Option(["-notrout","notrout"], "Do not create a tree file"), - _Option(["-treefile","treefile"], ["file"], None, 0, - "Output treefile name")] + _Option(["-treefile","treefile"], + "Output treefile name", + types=["file"])] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -618,31 +633,34 @@ def __init__(self, cmd="econsense", **kwargs): import Bio warnings.warn("Bio.Emboss.Application.EConsenseCommandline has been deprecated; please use 'fconsense' instead (see FConsenseCommandline).", Bio.BiopythonDeprecationWarning) self.parameters = [ - _Option(["-infile","infile"], ["file"], None, 1, - "file to read in (New Hampshire standard form)"), - _Option(["-notrout","notrout"], [], None, 0, + _Option(["-infile","infile"], + "file to read in (New Hampshire standard form)", + types=["file"], + is_required=True), + _Option(["-notrout","notrout"], "Do not create a tree file"), - _Option(["-trout","trout"], [], None, 0, + _Option(["-trout","trout"], "Create a tree file"), - _Option(["-treefile","treefile"], ["file"], None, 0, - "tree file name"), - _Option(["-noog","noog"], [], None, 0, + _Option(["-treefile","treefile"], + "tree file name", + types=["file"]), + _Option(["-noog","noog"], "Do not use an outgroup"), - _Option(["-og","og"], [], None, 0, + _Option(["-og","og"], "Use an outgroup"), - _Option(["-outgnum","outgnum"], [], None, 0, + _Option(["-outgnum","outgnum"], "number of the outgroup"), - _Option(["-nodrawtree","nodrawtree"], [], None, 0, + _Option(["-nodrawtree","nodrawtree"], "Do not draw a tree"), - _Option(["-drawtree","drawtree"], [], None, 0, + _Option(["-drawtree","drawtree"], "Draw tree"), - _Option(["-root","root"], [], None, 0, + _Option(["-root","root"], "Trees to be treated as Rooted"), - _Option(["-progress","progress"], [], None, 0, + _Option(["-progress","progress"], "Print indications of the progress of run"), - _Option(["-noprintsets","noprintsets"], [], None, 0, + _Option(["-noprintsets","noprintsets"], "Do not print out the sets of species"), - _Option(["-printsets","printsets"], [], None, 0, + _Option(["-printsets","printsets"], "Print out the sets of species")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -659,25 +677,31 @@ def __init__(self, cmd="eseqboot", **kwargs): import Bio warnings.warn("Bio.Emboss.Application.ESeqBootCommandline has been deprecated; please use 'fseqboot' instead (see FSeqBootCommandline).", Bio.BiopythonDeprecationWarning) self.parameters = [ - _Option(["-datafile","datafile"], ["file"], None, 1, - "Input file"), - _Option(["-randseed","randseed"], [], None, 1, - "Random number seed (must be odd)"), - _Option(["-method","method"], [], None, 1, - "Choose the method"), - _Option(["-test","test"], [], None, 1, - "Choose test"), - _Option(["-reps","reps"], [], None, 1, - "How many replicates"), - _Option(["-inter","inter"], [], None, 0, + _Option(["-datafile","datafile"], + "Input file", + types=["file"], + is_required=True), + _Option(["-randseed","randseed"], + "Random number seed (must be odd)", + is_required=True), + _Option(["-method","method"], + "Choose the method", + is_required=True), #TODO - Options? + _Option(["-test","test"], + "Choose test", #TODO - options? + is_required=True), + _Option(["-reps","reps"], + "How many replicates", + is_required=True), + _Option(["-inter","inter"], "Interleaved input"), - _Option(["-enzymes","enzymes"], [], None, 0, + _Option(["-enzymes","enzymes"], "Present in input file"), - _Option(["-all","all"], [], None, 0, + _Option(["-all","all"], "All alleles present at each locus"), - _Option(["-printdata","printdata"], [], None, 0, + _Option(["-printdata","printdata"], "Print out the data at start of run"), - _Option(["-progress","progress"], [], None, 0, + _Option(["-progress","progress"], "Print indications of progress of run")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -690,31 +714,34 @@ class FDNADistCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "fdnadist", **kwargs): self.parameters = [ - _Option(["-sequence", "sequence"], [], None, 1, - "seq file to use (phylip)"), - _Option(["-method", "method"], [], None, 1, - "sub. model [f,k,j,l,s]"), - _Option(["-gamma", "gamma"], [], None, 0, + _Option(["-sequence", "sequence"], + "seq file to use (phylip)", + types=["file"], + is_required=True), + _Option(["-method", "method"], + "sub. model [f,k,j,l,s]", + is_required=True), + _Option(["-gamma", "gamma"], "gamma [g, i,n]"), - _Option(["-ncategories", "ncategories"], [], None, 0, + _Option(["-ncategories", "ncategories"], "number of rate catergories (1-9)"), - _Option(["-rate", "rate"], [], None, 0, + _Option(["-rate", "rate"], "rate for each category"), - _Option(["-categories","categories"], [], None, 0, + _Option(["-categories","categories"], "File of substitution rate categories"), - _Option(["-weights", "weights"], [], None, 0, + _Option(["-weights", "weights"], "weights file"), - _Option(["-gammacoefficient", "gammacoefficient"], [], None, 0, + _Option(["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"), - _Option(["-invarfrac", "invarfrac"], [], None, 0, + _Option(["-invarfrac", "invarfrac"], "proportoin of invariant sites"), - _Option(["-ttratio", "ttratio"], [], None, 0, + _Option(["-ttratio", "ttratio"], "ts/tv ratio"), - _Option(["-freqsfrom", "freqsfrom"], [], None, 0, + _Option(["-freqsfrom", "freqsfrom"], "use emprical base freqs"), - _Option(["-basefreq", "basefreq"], [], None, 0, + _Option(["-basefreq", "basefreq"], "specify basefreqs"), - _Option(["-lower", "lower"], [], None, 0, + _Option(["-lower", "lower"], "lower triangle matrix (y/N)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -727,17 +754,19 @@ class FTreeDistCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "ftreedist", **kwargs): self.parameters = [ - _Option(["-intreefile", "intreefile"], [], None, 1, - "tree file to score (phylip)"), - _Option(["-dtype", "dtype"], [], None, 0, + _Option(["-intreefile", "intreefile"], + "tree file to score (phylip)", + types=["file"], + is_required=True), + _Option(["-dtype", "dtype"], "distance type ([S]ymetric, [b]ranch score)"), - _Option(["-pairing", "pairing"], [], None, 0, + _Option(["-pairing", "pairing"], "tree pairing method ([A]djacent pairs, all [p]ossible pairs)"), - _Option(["-style", "style"], [], None, 0, + _Option(["-style", "style"], "output style - [V]erbose, [f]ill, [s]parse"), - _Option(["-noroot", "noroot"], [], None, 0, + _Option(["-noroot", "noroot"], "treat trees as rooted [N/y]"), - _Option(["-outgrno", "outgrno"], [], None, 0, + _Option(["-outgrno", "outgrno"], "which taxon to root the trees with (starts from 0)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -750,25 +779,27 @@ class FNeighborCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "fneighbor", **kwargs): self.parameters = [ - _Option(["-datafile", "datafile"], [], None, 1, - "dist file to use (phylip)"), - _Option(["-matrixtype", "matrixtype"], [], None, 0, + _Option(["-datafile", "datafile"], + "dist file to use (phylip)", + types=["file"], + is_required=True), + _Option(["-matrixtype", "matrixtype"], "is martrix [S]quare pr [u]pper or [l]ower"), - _Option(["-treetype", "treetype"], [], None, 0, + _Option(["-treetype", "treetype"], "nj or UPGMA tree (n/u)"), - _Option(["-outgrno","outgrno" ], [], None, 0, + _Option(["-outgrno","outgrno" ], "taxon to use as OG"), - _Option(["-jumble", "jumble"], [], None, 0, + _Option(["-jumble", "jumble"], "randommise input order (Y/n)"), - _Option(["-seed", "seed"], [], None, 0, + _Option(["-seed", "seed"], "provide a random seed"), - _Option(["-trout", "trout"], [], None, 0, + _Option(["-trout", "trout"], "write tree (Y/n)"), - _Option(["-outtreefile", "outtreefile"], [], None, 0, + _Option(["-outtreefile", "outtreefile"], "filename for output tree"), - _Option(["-progress", "progress"], [], None, 0, + _Option(["-progress", "progress"], "print progress (Y/n)"), - _Option(["-treeprint", "treeprint"], [], None, 0, + _Option(["-treeprint", "treeprint"], "print tree (Y/n)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -781,31 +812,33 @@ class FSeqBootCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "fseqboot", **kwargs): self.parameters = [ - _Option(["-sequence", "sequence"], [], None, 1, - "seq file to sample (phylip)"), - _Option(["-categories", "catergories"], [], None, 0, + _Option(["-sequence", "sequence"], + "seq file to sample (phylip)", + types=["file"], + is_required=True), + _Option(["-categories", "catergories"], "file of input categories"), - _Option(["-weights", "weights"], [], None, 0, + _Option(["-weights", "weights"], " weights file"), - _Option(["-test", "test"], [], None, 0, + _Option(["-test", "test"], "specify operation, default is bootstrap"), - _Option(["-regular", "regular"], [], None, 0, + _Option(["-regular", "regular"], "absolute number to resample"), - _Option(["-fracsample", "fracsample"], [], None, 0, + _Option(["-fracsample", "fracsample"], "fraction to resample"), - _Option(["-rewriteformat", "rewriteformat"], [], None, 0, + _Option(["-rewriteformat", "rewriteformat"], "output format ([P]hyilp, [n]exus, [x]ml"), - _Option(["-seqtype", "seqtype"], [], None, 0, + _Option(["-seqtype", "seqtype"], "output format ([D]na, [p]rotein, [r]na"), - _Option(["-blocksize", "blocksize"], [], None, 0, + _Option(["-blocksize", "blocksize"], "print progress (Y/n)"), - _Option(["-reps", "reps"], [], None, 0, + _Option(["-reps", "reps"], "how many replicates, defaults to 100)"), - _Option(["-justweights", "jusweights"], [], None, 0, + _Option(["-justweights", "jusweights"], "what to write out [D]atasets of just [w]eights"), - _Option(["-seed", "seed"], [], None, 0, + _Option(["-seed", "seed"], "specify random seed"), - _Option(["-dotdiff", "dotdiff"], [], None, 0, + _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -821,35 +854,37 @@ class FDNAParsCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "fdnapars", **kwargs): self.parameters = [ - _Option(["-sequence", "sequence"], [], None, 1, - "seq file to use (phylip)"), - _Option(["-intreefile", "intreefile"], [], None, 0, + _Option(["-sequence", "sequence"], + "seq file to use (phylip)", + types=["file"], + is_required=True), + _Option(["-intreefile", "intreefile"], "Phylip tree file"), - _Option(["-weights", "weights"], [], None, 0, + _Option(["-weights", "weights"], "weights file"), - _Option(["-maxtrees", "maxtrees"], [], None, 0, + _Option(["-maxtrees", "maxtrees"], "max trees to save during run"), - _Option(["-thorough", "thorough"], [], None, 0, + _Option(["-thorough", "thorough"], "more thorough search (Y/n)"), - _Option(["-rearrange", "rearrange"], [], None, 0, + _Option(["-rearrange", "rearrange"], "Rearrange on jsut 1 best tree (Y/n)"), - _Option(["-transversion", "transversion"], [], None, 0, + _Option(["-transversion", "transversion"], "Use tranversion parsimony (y/N)"), - _Option(["-njumble", "njumble"], [], None, 0, + _Option(["-njumble", "njumble"], "number of times to randomise input order (default is 0)"), - _Option(["-seed", "seed"], [], None, 0, + _Option(["-seed", "seed"], "provde random seed"), - _Option(["-outgrno", "outgrno"], [], None, 0, + _Option(["-outgrno", "outgrno"], "Specify outgroup"), - _Option(["-thresh", "thresh"], [], None, 0, + _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"), - _Option(["-threshold", "threshold"], [], None, 0, + _Option(["-threshold", "threshold"], "Threshold value"), - _Option(["-trout", "trout"], [], None, 0, + _Option(["-trout", "trout"], "Write trees to file (Y/n)"), - _Option(["-outtreefile", "outtreefile"], [], None, 0, + _Option(["-outtreefile", "outtreefile"], "filename for output tree"), - _Option(["-dotdiff", "dotdiff"], [], None, 0, + _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -865,29 +900,33 @@ class FProtParsCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "fprotpars", **kwargs): self.parameters = [ - _Option(["-sequence", "sequence"], [], None, 1, - "seq file to use (phylip)"), - _Option(["-intreefile", "intreefile"], [], None, 0, + _Option(["-sequence", "sequence"], + "seq file to use (phylip)", + types=["file"], + is_required=True), + _Option(["-intreefile", "intreefile"], "Phylip tree file to score"), - _Option(["-outtreefile", "outtreefile"], [], None, 1, - "phylip tree output file"), - _Option(["-weights", "weights"], [], None, 0, + _Option(["-outtreefile", "outtreefile"], + "phylip tree output file", + types=["file"], + is_required=True), + _Option(["-weights", "weights"], "weights file"), - _Option(["-whichcode", "whichcode"], [], None, 0, + _Option(["-whichcode", "whichcode"], "which genetic code, [U,M,V,F,Y]]"), - _Option(["-njumble", "njumble"], [], None, 0, + _Option(["-njumble", "njumble"], "number of times to randomise input order (default is 0)"), - _Option(["-seed", "seed"], [], None, 0, + _Option(["-seed", "seed"], "provde random seed"), - _Option(["-outgrno", "outgrno"], [], None, 0, + _Option(["-outgrno", "outgrno"], "Specify outgroup"), - _Option(["-thresh", "thresh"], [], None, 0, + _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"), - _Option(["-threshold", "threshold"], [], None, 0, + _Option(["-threshold", "threshold"], "Threshold value"), - _Option(["-trout", "trout"], [], None, 0, + _Option(["-trout", "trout"], "Write trees to file (Y/n)"), - _Option(["-dotdiff", "dotdiff"], [], None, 0, + _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -900,33 +939,35 @@ class FProtDistCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "fprotdist", **kwargs): self.parameters = [ - _Option(["-sequence", "sequence"], [], None, 1, - "seq file to use (phylip)"), - _Option(["-ncategories", "ncategories"], [], None, 0, + _Option(["-sequence", "sequence"], + "seq file to use (phylip)", + types=["file"], + is_required=True), + _Option(["-ncategories", "ncategories"], "number of rate catergories (1-9)"), - _Option(["-rate", "rate"], [], None, 0, + _Option(["-rate", "rate"], "rate for each category"), - _Option(["-catergories","catergories"], [], None, 0, + _Option(["-catergories","catergories"], "file of rates"), - _Option(["-weights", "weights"], [], None, 0, + _Option(["-weights", "weights"], "weights file"), - _Option(["-method", "method"], [], None, 0, + _Option(["-method", "method"], "sub. model [j,h,d,k,s,c]"), - _Option(["-gamma", "gamma"], [], None, 0, + _Option(["-gamma", "gamma"], "gamma [g, i,c]"), - _Option(["-gammacoefficient", "gammacoefficient"], [], None, 0, + _Option(["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"), - _Option(["-invarcoefficient", "invarcoefficient"], [], None, 0, + _Option(["-invarcoefficient", "invarcoefficient"], "float for variation of substitution rate among sites"), - _Option(["-aacateg", "aacateg"], [], None, 0, + _Option(["-aacateg", "aacateg"], "Choose the category to use [G,C,H]"), - _Option(["-whichcode", "whichcode"], [], None, 0, + _Option(["-whichcode", "whichcode"], "genetic code [c,m,v,f,y]"), - _Option(["-ease", "ease"], [], None, 0, + _Option(["-ease", "ease"], "Pob change catergory (float between -0 and 1)"), - _Option(["-ttratio", "ttratio"], [], None, 0, + _Option(["-ttratio", "ttratio"], "Transition/transversion ratio (0-1)"), - _Option(["-basefreq", "basefreq"], [], None, 0, + _Option(["-basefreq", "basefreq"], "DNA base frequencies (space seperated list)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -939,19 +980,21 @@ class FConsenseCommandline(_EmbossCommandLine): """ def __init__(self, cmd = "fconsense", **kwargs): self.parameters = [ - _Option(["-intreefile", "intreefile"], [], None, 1, - "file with phylip trees to make consensus from"), - _Option(["-method", "method"], [], None, 0, + _Option(["-intreefile", "intreefile"], + "file with phylip trees to make consensus from", + types=["file"], + is_required=True), + _Option(["-method", "method"], "consensus method [s, mr, MRE, ml]"), - _Option(["-mlfrac", "mlfrac"], [], None, 0, + _Option(["-mlfrac", "mlfrac"], "cut-off freq for a branch to appear in consensus (0.5-1.0)"), - _Option(["-root", "root"], [], None, 0, + _Option(["-root", "root"], "treat trees as rooted (YES, no)"), - _Option(["-outgrno", "outgrno"], [], None, 0, + _Option(["-outgrno", "outgrno"], "OTU to use as outgroup (starts from 0)"), - _Option(["-trout", "trout"], [], None, 0, + _Option(["-trout", "trout"], "treat trees as rooted (YES, no)"), - _Option(["-outtreefile", "outtreefile"], [], None, 0, + _Option(["-outtreefile", "outtreefile"], "Phylip tree output file (optional)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -961,23 +1004,30 @@ class WaterCommandline(_EmbossCommandLine): """ def __init__(self, cmd="water", **kwargs): self.parameters = [ - _Option(["-asequence","asequence"], ["file"], None, 1, - "First sequence to align"), - _Option(["-bsequence","bsequence"], ["file"], None, 1, - "Second sequence to align"), - _Option(["-gapopen","gapopen"], [], None, 1, - "Gap open penalty"), - _Option(["-gapextend","gapextend"], [], None, 1, - "Gap extension penalty"), - _Option(["-datafile","datafile"], ["file"], None, 0, - "Matrix file"), - _Option(["-similarity","similarity"], [], None, 0, + _Option(["-asequence","asequence"], + "First sequence to align", + types=["file"], + is_required=True), + _Option(["-bsequence","bsequence"], + "Second sequence to align", + types=["file"], + is_required=True), + _Option(["-gapopen","gapopen"], + "Gap open penalty", + is_required=True), + _Option(["-gapextend","gapextend"], + "Gap extension penalty", + is_required=True), + _Option(["-datafile","datafile"], + "Matrix file", + types=["file"]), + _Option(["-similarity","similarity"], "Display percent identity and similarity"), - _Option(["-snucleotide","snucleotide"], [], None, 0, + _Option(["-snucleotide","snucleotide"], "Sequences are nucleotide (boolean)"), - _Option(["-sprotein","sprotein"], [], None, 0, + _Option(["-sprotein","sprotein"], "Sequences are protein (boolean)"), - _Option(["-aformat","aformat"], [], None, 0, + _Option(["-aformat","aformat"], "Display output in a different specified output format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -987,23 +1037,30 @@ class NeedleCommandline(_EmbossCommandLine): """ def __init__(self, cmd="needle", **kwargs): self.parameters = [ - _Option(["-asequence","asequence"], ["file"], None, 1, - "First sequence to align"), - _Option(["-bsequence","bsequence"], ["file"], None, 1, - "Second sequence to align"), - _Option(["-gapopen","gapopen"], [], None, 1, - "Gap open penalty"), - _Option(["-gapextend","gapextend"], [], None, 1, - "Gap extension penalty"), - _Option(["-datafile","datafile"], ["file"], None, 0, - "Matrix file"), - _Option(["-similarity","similarity"], [], None, 0, + _Option(["-asequence","asequence"], + "First sequence to align", + types=["file"], + is_required=True), + _Option(["-bsequence","bsequence"], + "Second sequence to align", + types=["file"], + is_required=True), + _Option(["-gapopen","gapopen"], + "Gap open penalty", + is_required=True), + _Option(["-gapextend","gapextend"], + "Gap extension penalty", + is_required=True), + _Option(["-datafile","datafile"], + "Matrix file", + types=["file"]), + _Option(["-similarity","similarity"], "Display percent identity and similarity"), - _Option(["-snucleotide","snucleotide"], [], None, 0, + _Option(["-snucleotide","snucleotide"], "Sequences are nucleotide (boolean)"), - _Option(["-sprotein","sprotein"], [], None, 0, + _Option(["-sprotein","sprotein"], "Sequences are protein (boolean)"), - _Option(["-aformat","aformat"], [], None, 0, + _Option(["-aformat","aformat"], "Display output in a different specified output format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1013,15 +1070,18 @@ class FuzznucCommandline(_EmbossCommandLine): """ def __init__(self, cmd="fuzznuc", **kwargs): self.parameters = [ - _Option(["-sequence","sequence"], [], None, 1, - "Sequence database USA"), - _Option(["-pattern","pattern"], [], None, 1, - "Search pattern, using standard IUPAC one-letter codes"), - _Option(["-mismatch","mismatch"], [], None, 1, - "Number of mismatches"), - _Option(["-complement","complement"], [], None, 0, + _Option(["-sequence","sequence"], + "Sequence database USA", + is_required=True), + _Option(["-pattern","pattern"], + "Search pattern, using standard IUPAC one-letter codes", + is_required=True), + _Option(["-mismatch","mismatch"], + "Number of mismatches", + is_required=True), + _Option(["-complement","complement"], "Search complementary strand"), - _Option(["-rformat","rformat"], [], None, 0, + _Option(["-rformat","rformat"], "Specify the report format to output in.")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1031,42 +1091,44 @@ class Est2GenomeCommandline(_EmbossCommandLine): """ def __init__(self, cmd="est2genome", **kwargs): self.parameters = [ - _Option(["-est","est"], [], None, 1, - "EST sequence(s)"), - _Option(["-genome","genome"], [], None, 1, - "Genomic sequence"), - _Option(["-match","match"], [], None, 0, + _Option(["-est","est"], + "EST sequence(s)", + is_required=True), + _Option(["-genome","genome"], + "Genomic sequence", + is_required=True), + _Option(["-match","match"], "Score for matching two bases"), - _Option(["-mismatch","mismatch"], [], None, 0, + _Option(["-mismatch","mismatch"], "Cost for mismatching two bases"), - _Option(["-gappenalty","gappenalty"], [], None, 0, + _Option(["-gappenalty","gappenalty"], "Cost for deleting a single base in either sequence, " "excluding introns"), - _Option(["-intronpenalty","intronpenalty"], [], None, 0, + _Option(["-intronpenalty","intronpenalty"], "Cost for an intron, independent of length."), - _Option(["-splicepenalty","splicepenalty"], [], None, 0, + _Option(["-splicepenalty","splicepenalty"], "Cost for an intron, independent of length " "and starting/ending on donor-acceptor sites"), - _Option(["-minscore","minscore"], [], None, 0, + _Option(["-minscore","minscore"], "Exclude alignments with scores below this threshold score."), - _Option(["-reverse","reverse"], [], None, 0, + _Option(["-reverse","reverse"], "Reverse the orientation of the EST sequence"), - _Option(["-splice","splice"], [], None, 0, + _Option(["-splice","splice"], "Use donor and acceptor splice sites."), - _Option(["-mode","mode"], [], None, 0, + _Option(["-mode","mode"], "This determines the comparion mode. 'both', 'forward' " "'reverse'"), - _Option(["-best","best"], [], None, 0, + _Option(["-best","best"], "You can print out all comparisons instead of just the best"), - _Option(["-space","space"], [], None, 0, + _Option(["-space","space"], "for linear-space recursion."), - _Option(["-shuffle","shuffle"], [], None, 0, + _Option(["-shuffle","shuffle"], "Shuffle"), - _Option(["-seed","seed"], [], None, 0, + _Option(["-seed","seed"], "Random number seed"), - _Option(["-align","align"], [], None, 0, + _Option(["-align","align"], "Show the alignment."), - _Option(["-width","width"], [], None, 0, + _Option(["-width","width"], "Alignment width") ] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1077,19 +1139,23 @@ class ETandemCommandline(_EmbossCommandLine): """ def __init__(self, cmd="etandem", **kwargs): self.parameters = [ - _Option(["-sequence","sequence"], ["file"], None, 1, - "Sequence"), - _Option(["-minrepeat","minrepeat"], [], None, 1, - "Minimum repeat size"), - _Option(["-maxrepeat","maxrepeat"], [], None, 1, - "Maximum repeat size"), - _Option(["-threshold","threshold"], [], None, 0, + _Option(["-sequence","sequence"], + "Sequence", + types=["file"], + is_required=True), + _Option(["-minrepeat","minrepeat"], + "Minimum repeat size", + is_required=True), + _Option(["-maxrepeat","maxrepeat"], + "Maximum repeat size", + is_required=True), + _Option(["-threshold","threshold"], "Threshold score"), - _Option(["-mismatch","mismatch"], [], None, 0, + _Option(["-mismatch","mismatch"], "Allow N as a mismatch"), - _Option(["-uniform","uniform"], [], None, 0, + _Option(["-uniform","uniform"], "Allow uniform consensus"), - _Option(["-rformat","rformat"], [], None, 0, + _Option(["-rformat","rformat"], "Output report format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1099,17 +1165,24 @@ class EInvertedCommandline(_EmbossCommandLine): """ def __init__(self, cmd="einverted", **kwargs): self.parameters = [ - _Option(["-sequence","sequence"], ["file"], None, 1, - "Sequence"), - _Option(["-gap","gap"], ["file"], None, 1, - "Gap penalty"), - _Option(["-threshold","threshold"], [], None, 1, - "Minimum score threshold"), - _Option(["-match","match"], [], None, 1, - "Match score"), - _Option(["-mismatch","mismatch"], [], None, 1, - "Mismatch score"), - _Option(["-maxrepeat","maxrepeat"], [], None, 0, + _Option(["-sequence","sequence"], + "Sequence", + types=["file"], + is_required=True), + _Option(["-gap","gap"], + "Gap penalty", + types=["file"], + is_required=True), + _Option(["-threshold","threshold"], + "Minimum score threshold", + is_required=True), + _Option(["-match","match"], + "Match score", + is_required=True), + _Option(["-mismatch","mismatch"], + "Mismatch score", + is_required=True), + _Option(["-maxrepeat","maxrepeat"], "Maximum separation between the start and end of repeat"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1120,18 +1193,25 @@ class PalindromeCommandline(_EmbossCommandLine): """ def __init__(self, cmd="palindrome", **kwargs): self.parameters = [ - _Option(["-sequence","sequence"], ["file"], None, 1, - "Sequence"), - _Option(["-minpallen","minpallen"], [], None, 1, - "Minimum palindrome length"), - _Option(["-maxpallen","maxpallen"], [], None, 1, - "Maximum palindrome length"), - _Option(["-gaplimit","gaplimit"], [], None, 1, - "Maximum gap between repeats"), - _Option(["-nummismatches","nummismatches"], [], None, 1, - "Number of mismatches allowed"), - _Option(["-overlap","overlap"], [], None, 1, - "Report overlapping matches"), + _Option(["-sequence","sequence"], + "Sequence", + types=["file"], + is_required=True), + _Option(["-minpallen","minpallen"], + "Minimum palindrome length", + is_required=True), + _Option(["-maxpallen","maxpallen"], + "Maximum palindrome length", + is_required=True), + _Option(["-gaplimit","gaplimit"], + "Maximum gap between repeats", + is_required=True), + _Option(["-nummismatches","nummismatches"], + "Number of mismatches allowed", + is_required=True), + _Option(["-overlap","overlap"], + "Report overlapping matches", + is_required=True), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1141,13 +1221,19 @@ class TranalignCommandline(_EmbossCommandLine): """ def __init__(self, cmd="tranalign", **kwargs): self.parameters = [ - _Option(["-asequence","asequence"], ["file"], None, 1, - "Nucleotide sequences to be aligned."), - _Option(["-bsequence","bsequence"], ["file"], None, 1, - "Protein sequence alignment"), - _Option(["-outseq","outseq"], ["file"], None, 1, - "Output sequence file."), - _Option(["-table","table"], [], None, 0, + _Option(["-asequence","asequence"], + "Nucleotide sequences to be aligned.", + types=["file"], + is_required=True), + _Option(["-bsequence","bsequence"], + "Protein sequence alignment", + types=["file"], + is_required=True), + _Option(["-outseq","outseq"], + "Output sequence file.", + types=["file"], + is_required=True), + _Option(["-table","table"], "Code to use")] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1157,17 +1243,26 @@ class DiffseqCommandline(_EmbossCommandLine): """ def __init__(self, cmd="diffseq", **kwargs): self.parameters = [ - _Option(["-asequence","asequence"], ["file"], None, 1, - "First sequence to compare"), - _Option(["-bsequence","bsequence"], ["file"], None, 1, - "Second sequence to compare"), - _Option(["-wordsize","wordsize"], [], None, 1, - "Word size to use for comparisons (10 default)"), - _Option(["-aoutfeat","aoutfeat"], ["file"], None, 1, - "File for output of first sequence's features"), - _Option(["-boutfeat","boutfeat"], ["file"], None, 1, - "File for output of second sequence's features"), - _Option(["-rformat","rformat"], ["output"], None, 0, + _Option(["-asequence","asequence"], + "First sequence to compare", + types=["file"], + is_required=True), + _Option(["-bsequence","bsequence"], + "Second sequence to compare", + types=["file"], + is_required=True), + _Option(["-wordsize","wordsize"], + "Word size to use for comparisons (10 default)", + is_required=True), + _Option(["-aoutfeat","aoutfeat"], + "File for output of first sequence's features", + types=["file"], + is_required=True), + _Option(["-boutfeat","boutfeat"], + "File for output of second sequence's features", + types=["file"], + is_required=True), + _Option(["-rformat","rformat"], "Output report file format") ] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1178,12 +1273,18 @@ class IepCommandline(_EmbossCommandLine): """ def __init__(self, cmd="iep", **kwargs): self.parameters = [ - _Option(["-sequence","sequence"], ["file"], None, 1, - "Protein sequence(s) filename"), - _Option(["-amino","amino"], [], None, 0), - _Option(["-lysinemodified","lysinemodified"], [], None, 0), - _Option(["-disulphides","disulphides"], [], None, 0), - _Option(["-notermini","notermini"], [], None, 0), + _Option(["-sequence","sequence"], + "Protein sequence(s) filename", + types=["file"], + is_require=True), + _Option(["-amino","amino"], + "Amino acid"), + _Option(["-lysinemodified","lysinemodified"], + ""), #TODO + _Option(["-disulphides","disulphides"], + ""), #TODO + _Option(["-notermini","notermini"], + "") #TODO ] _EmbossCommandLine.__init__(self, cmd, **kwargs) @@ -1202,13 +1303,15 @@ class SeqretCommandline(_EmbossMinimalCommandLine): """ def __init__(self, cmd="seqret", **kwargs): self.parameters = [ - _Option(["-sequence","sequence"], ["file"], None, 0, - "Input sequence(s) filename"), - _Option(["-outseq","outseq"], ["file"], None, 0, - "Output sequence file."), - _Option(["-sformat","sformat"], [], None, 0, + _Option(["-sequence","sequence"], + "Input sequence(s) filename", + types=["file"]), + _Option(["-outseq","outseq"], + "Output sequence file.", + types=["file"]), + _Option(["-sformat","sformat"], "Input sequence(s) format (e.g. fasta, genbank)"), - _Option(["-osformat","osformat"], [], None, 0, + _Option(["-osformat","osformat"], "Output sequence(s) format (e.g. fasta, genbank)"), ] _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs) @@ -1239,11 +1342,13 @@ class SeqmatchallCommandline(_EmbossCommandLine): """ def __init__(self, cmd="seqmatchall", **kwargs): self.parameters = [ - _Option(["-sequence", "sequence"], ["file"], - None, 1, "Readable set of sequences"), - _Option(["-wordsize", "wordsize"], [], - None, 0, "Word size (Integer 2 or more, default 4)"), - _Option(["-aformat","aformat"], [], None, 0, + _Option(["-sequence", "sequence"], + "Readable set of sequences", + types=["file"], + is_required=True), + _Option(["-wordsize", "wordsize"], + "Word size (Integer 2 or more, default 4)"), + _Option(["-aformat","aformat"], "Display output in a different specified output format"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) diff --git a/Bio/PopGen/GenePop/Controller.py b/Bio/PopGen/GenePop/Controller.py index b6ac7f9a98f..e3700966912 100644 --- a/Bio/PopGen/GenePop/Controller.py +++ b/Bio/PopGen/GenePop/Controller.py @@ -169,54 +169,27 @@ class _GenePopCommandline(AbstractCommandline): def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs): self.parameters = [ _Argument(["command"], - [], - None, - True, - "GenePop option to be called"), + "GenePop option to be called", + is_required=True), _Argument(["mode"], - [], - None, - True, - "Should allways be batch"), + "Should allways be batch", + is_required=True), _Argument(["input"], - [], - None, - True, - "Input file"), + "Input file", + is_required=True), _Argument(["Dememorization"], - [], - None, - False, "Dememorization step"), _Argument(["BatchNumber"], - [], - None, - False, "Number of MCMC batches"), _Argument(["BatchLength"], - [], - None, - False, "Length of MCMC chains"), _Argument(["HWtests"], - [], - None, - False, "Enumeration or MCMC"), _Argument(["IsolBDstatistic"], - [], - None, - False, "IBD statistic (a or e)"), _Argument(["MinimalDistance"], - [], - None, - False, "Minimal IBD distance"), _Argument(["GeographicScale"], - [], - None, - False, "Log or Linear"), ] AbstractCommandline.__init__(self, cmd, **kwargs) diff --git a/Bio/Sequencing/Applications/_Novoalign.py b/Bio/Sequencing/Applications/_Novoalign.py index 4f3e13147b6..180f48996a1 100644 --- a/Bio/Sequencing/Applications/_Novoalign.py +++ b/Bio/Sequencing/Applications/_Novoalign.py @@ -1,4 +1,5 @@ # Copyright 2009 by Osvaldo Zagordi. All rights reserved. +# Revisions copyright 2010 by Peter Cock. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. @@ -42,129 +43,132 @@ def __init__(self, cmd="novoalign", **kwargs): self.parameters = \ [ - _Option(["-d", "database"], ["file"], - None, 0, "database filename", - 0), - _Option(["-f", "readfile"], ["file"], - None, 0, "read file", - 0), - _Option(["-F", "format"], [], - lambda x: x in READ_FORMAT, - 0, "Format of read files.\n\nAllowed values: %s" % ", ".join(READ_FORMAT), - 0), + _Option(["-d", "database"], + "database filename", + types=["file"], + equate=False), + _Option(["-f", "readfile"], + "read file", + types=["file"], + equate=False), + _Option(["-F", "format"], + "Format of read files.\n\nAllowed values: %s" \ + % ", ".join(READ_FORMAT), + checker_function=lambda x: x in READ_FORMAT, + equate=False), # Alignment scoring options - _Option(["-t", "threshold"], [], - lambda x: isinstance(x, types.IntType), - 0, "Threshold for alignment score", - 0), - _Option(["-g", "gap_open"], [], - lambda x: isinstance(x, types.IntType), - 0, "Gap opening penalty [default: 40]", - 0), - _Option(["-x", "gap_extend"], [], - lambda x: isinstance(x, types.IntType), - 0, "Gap extend penalty [default: 15]", - 0), - _Option(["-u", "unconverted"], [], - lambda x: isinstance(x, types.IntType), 0, + _Option(["-t", "threshold"], + "Threshold for alignment score", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-g", "gap_open"], + "Gap opening penalty [default: 40]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-x", "gap_extend"], + "Gap extend penalty [default: 15]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-u", "unconverted"], "Experimental: unconverted cytosines penalty in bisulfite mode\n\n" "Default: no penalty", - 0), + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), # Quality control and read filtering - _Option(["-l", "good_bases"], [], - lambda x: isinstance(x, types.IntType), - 0, "Minimum number of good quality bases [default: log(N_g, 4) + 5]", - 0), - _Option(["-h", "homopolymer"], [], - lambda x: isinstance(x, types.IntType), - 0, "Homopolymer read filter [default: 20; disable: negative value]", - 0), + _Option(["-l", "good_bases"], + "Minimum number of good quality bases [default: log(N_g, 4) + 5]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-h", "homopolymer"], + "Homopolymer read filter [default: 20; disable: negative value]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), # Read preprocessing options - _Option(["-a", "adapter3"], [], - lambda x: isinstance(x, types.StringType), - 0, "Strips a 3' adapter sequence prior to alignment.\n\n" + _Option(["-a", "adapter3"], + "Strips a 3' adapter sequence prior to alignment.\n\n" "With paired ends two adapters can be specified", - 0), - _Option(["-n", "truncate"], [], - lambda x: isinstance(x, types.IntType), - 0, "Truncate to specific length before alignment", - 0), - _Option(["-s", "trimming"], [], - lambda x: isinstance(x, types.IntType), - 0, "If fail to align, trim by s bases until they map or become shorter than l.\n\n" + checker_function=lambda x: isinstance(x, types.StringType), + equate=False), + _Option(["-n", "truncate"], + "Truncate to specific length before alignment", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-s", "trimming"], + "If fail to align, trim by s bases until they map or become shorter than l.\n\n" "Ddefault: 2", - 0), - _Option(["-5", "adapter5"], [], - lambda x: isinstance(x, types.StringType), - 0, "Strips a 5' adapter sequence.\n\n" + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-5", "adapter5"], + "Strips a 5' adapter sequence.\n\n" "Similar to -a (adaptor3), but on the 5' end.", - 0), + checker_function=lambda x: isinstance(x, types.StringType), + equate=False), # Reporting options - _Option(["-o", "report"], [], - lambda x: x in REPORT_FORMAT, - 0, "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \ + _Option(["-o", "report"], + "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \ % ", ".join(REPORT_FORMAT), - 0), - _Option(["-Q", "quality"], [], - lambda x: isinstance(x, types.IntType), - 0, "Lower threshold for an alignment to be reported [default: 0]", - 0), - _Option(["-R", "repeats"], [], - lambda x: isinstance(x, types.IntType), - 0, "If score difference is higher, report repeats.\n\n" + checker_function=lambda x: x in REPORT_FORMAT, + equate=False), + _Option(["-Q", "quality"], + "Lower threshold for an alignment to be reported [default: 0]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-R", "repeats"], + "If score difference is higher, report repeats.\n\n" "Otherwise -r read method applies [default: 5]", - 0), - _Option(["-r", "r_method"], [], - lambda x: x.split()[0] in REPEAT_METHOD, - 0, "Methods to report reads with multiple matches.\n\n" + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-r", "r_method"], + "Methods to report reads with multiple matches.\n\n" "Allowed values: %s\n" "'All' and 'Exhaustive' accept limits." \ % ", ".join(REPEAT_METHOD), - 0), - _Option(["-e", "recorded"], [], - lambda x: isinstance(x, types.IntType), - 0, "Alignments recorded with score equal to the best.\n\n" + checker_function=lambda x: x.split()[0] in REPEAT_METHOD, + equate=False), + _Option(["-e", "recorded"], + "Alignments recorded with score equal to the best.\n\n" "Default: 1000 in default read method, otherwise no limit.", - 0), - _Option(["-q", "qual_digits"], [], - lambda x: isinstance(x, types.IntType), - 0, "Decimal digits for quality scores [default: 0]", - 0), + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), + _Option(["-q", "qual_digits"], + "Decimal digits for quality scores [default: 0]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), # Paired end options - _Option(["-i", "fragment"], [], - lambda x: len(x.split()) == 2, - 0, "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", - 0), - _Option(["-v", "variation"], [], - lambda x: isinstance(x, types.IntType), - 0, "Structural variation penalty [default: 70]", - 0), + _Option(["-i", "fragment"], + "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", + checker_function=lambda x: len(x.split()) == 2, + equate=False), + _Option(["-v", "variation"], + "Structural variation penalty [default: 70]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), # miRNA mode - _Option(["-m", "miRNA"], [], - lambda x: isinstance(x, types.IntType), - 0, "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", - 0), + _Option(["-m", "miRNA"], + "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), # Multithreading - _Option(["-c", "cores"], [], - lambda x: isinstance(x, types.IntType), - 0, "Number of threads, disabled on free versions [default: number of cores]", - 0), + _Option(["-c", "cores"], + "Number of threads, disabled on free versions [default: number of cores]", + checker_function=lambda x: isinstance(x, types.IntType), + equate=False), # Quality calibrations - _Option(["-k", "read_cal"], [], - lambda x: isinstance(x, types.StringType), - 0, "Read quality calibration from file (mismatch counts)", - 0), - _Option(["-K", "write_cal"], [], - lambda x: isinstance(x, types.StringType), - 0, "Accumulate mismatch counts and write to file", - 0) + _Option(["-k", "read_cal"], + "Read quality calibration from file (mismatch counts)", + checker_function=lambda x: isinstance(x, types.StringType), + equate=False), + _Option(["-K", "write_cal"], + "Accumulate mismatch counts and write to file", + checker_function=lambda x: isinstance(x, types.StringType), + equate=False), ] AbstractCommandline.__init__(self, cmd, **kwargs)