From d058adf7e3752f01ca056f23fad00e2f574c051a Mon Sep 17 00:00:00 2001
From: Brian Claywell <bclaywel@fhcrc.org>
Date: Mon, 13 Jul 2015 18:07:22 -0700
Subject: [PATCH] Replace program output plugin with hard text.

I've also included a small script to help with regenerating the `.help`
files before a release. Fixes #43.
---
 doc-requirements.txt      |   1 -
 docs/backtrans_align.help |  22 ++++++
 docs/backtrans_align.rst  |   2 +-
 docs/conf.py              |   2 +-
 docs/convert.help         | 156 ++++++++++++++++++++++++++++++++++++++
 docs/convert_mogrify.rst  |   2 +-
 docs/extract_ids.help     |  18 +++++
 docs/extract_ids.rst      |   3 +-
 docs/info.help            |  22 ++++++
 docs/info.rst             |   2 +-
 docs/primer_trim.help     |  41 ++++++++++
 docs/primer_trim.rst      |   2 +-
 docs/quality_filter.help  |  79 +++++++++++++++++++
 docs/quality_filter.rst   |   2 +-
 docs/regen_help.sh        |   7 ++
 15 files changed, 352 insertions(+), 9 deletions(-)
 create mode 100644 docs/backtrans_align.help
 create mode 100644 docs/convert.help
 create mode 100644 docs/extract_ids.help
 create mode 100644 docs/info.help
 create mode 100644 docs/primer_trim.help
 create mode 100644 docs/quality_filter.help
 create mode 100755 docs/regen_help.sh

diff --git a/doc-requirements.txt b/doc-requirements.txt
index 43a5484..d903aea 100644
--- a/doc-requirements.txt
+++ b/doc-requirements.txt
@@ -1,3 +1,2 @@
 biopython>=1.58
 sphinx>=1.1
-sphinxcontrib-programoutput==0.8
diff --git a/docs/backtrans_align.help b/docs/backtrans_align.help
new file mode 100644
index 0000000..9d0d549
--- /dev/null
+++ b/docs/backtrans_align.help
@@ -0,0 +1,22 @@
+usage: seqmagick backtrans-align [-h] [-o destination_file]
+                                 [-t {standard-ambiguous,vertebrate-mito,standard}]
+                                 [-a {fail,warn,none}]
+                                 protein_align nucl_align
+
+Given a protein alignment and unaligned nucleotides, align the nucleotides
+using the protein alignment. Protein and nucleotide sequence files must
+contain the same number of sequences, in the same order, with the same IDs.
+
+positional arguments:
+  protein_align         Protein Alignment
+  nucl_align            FASTA Alignment
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -o destination_file, --out-file destination_file
+                        Output destination. Default: STDOUT
+  -t {standard-ambiguous,vertebrate-mito,standard}, --translation-table {standard-ambiguous,vertebrate-mito,standard}
+                        Translation table to use. [Default: standard-
+                        ambiguous]
+  -a {fail,warn,none}, --fail-action {fail,warn,none}
+                        Action to take on an ambiguous codon [default: fail]
diff --git a/docs/backtrans_align.rst b/docs/backtrans_align.rst
index 49b0c36..4862cac 100644
--- a/docs/backtrans_align.rst
+++ b/docs/backtrans_align.rst
@@ -5,4 +5,4 @@ Given a protein alignment and unaligned nucleotides, align the nucleotides
 using the protein alignment.  Protein and nucleotide sequence files must
 contain the same number of sequences, in the same order, with the same IDs.
 
-.. program-output:: ../seqmagick.py backtrans-align -h
+.. literalinclude:: backtrans_align.help
diff --git a/docs/conf.py b/docs/conf.py
index 676085e..1c12ad1 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -25,7 +25,7 @@
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinxcontrib.programoutput']
+extensions = []
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
diff --git a/docs/convert.help b/docs/convert.help
new file mode 100644
index 0000000..b2b9e5c
--- /dev/null
+++ b/docs/convert.help
@@ -0,0 +1,156 @@
+usage: seqmagick convert [-h] [--line-wrap N]
+                         [--sort {length-asc,length-desc,name-asc,name-desc}]
+                         [--apply-function /path/to/module.py:function_name[:parameter]]
+                         [--cut start:end[,start2:end2]] [--relative-to ID]
+                         [--drop start:end[,start2:end2]] [--dash-gap]
+                         [--lower] [--mask start1:end1[,start2:end2]]
+                         [--reverse] [--reverse-complement] [--squeeze]
+                         [--squeeze-threshold PROP]
+                         [--transcribe {dna2rna,rna2dna}]
+                         [--translate {dna2protein,rna2protein,dna2proteinstop,rna2proteinstop}]
+                         [--ungap] [--upper] [--deduplicate-sequences]
+                         [--deduplicated-sequences-file FILE]
+                         [--deduplicate-taxa] [--exclude-from-file FILE]
+                         [--include-from-file FILE] [--head N]
+                         [--max-length N] [--min-length N]
+                         [--min-ungapped-length N] [--pattern-include REGEX]
+                         [--pattern-exclude REGEX] [--prune-empty]
+                         [--sample N] [--seq-pattern-include REGEX]
+                         [--seq-pattern-exclude REGEX] [--tail N]
+                         [--first-name] [--name-suffix SUFFIX]
+                         [--name-prefix PREFIX]
+                         [--pattern-replace search_pattern replace_pattern]
+                         [--strip-range] [--input-format FORMAT]
+                         [--output-format FORMAT]
+                         [--alphabet {protein,dna,dna-ambiguous,rna,rna-ambiguous}]
+                         source_file dest_file
+
+Convert between sequence formats
+
+positional arguments:
+  source_file           Input sequence file
+  dest_file             Output file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --alphabet {protein,dna,dna-ambiguous,rna,rna-ambiguous}
+                        Input alphabet. Required for writing NEXUS.
+
+Sequence File Modification:
+  --line-wrap N         Adjust line wrap for sequence strings. When N is 0,
+                        all line breaks are removed. Only fasta files are
+                        supported for the output format.
+  --sort {length-asc,length-desc,name-asc,name-desc}
+                        Perform sorting by length or name, ascending or
+                        descending. ASCII sorting is performed for names
+
+Sequence Modificaton:
+  --apply-function /path/to/module.py:function_name[:parameter]
+                        Specify a custom function to apply to the input
+                        sequences, specified as
+                        /path/to/file.py:function_name. Function should accept
+                        an iterable of Bio.SeqRecord objects, and yield
+                        SeqRecords. If the parameter is specified, it will be
+                        passed as a string as the second argument to the
+                        function. Specify more than one to chain.
+  --cut start:end[,start2:end2]
+                        Keep only the residues within the 1-indexed start and
+                        end positions specified, : separated. Includes last
+                        item. Start or end can be left unspecified to indicate
+                        start/end of sequence. A negative start may be
+                        provided to indicate an offset from the end of the
+                        sequence. Note that to prevent negative numbers being
+                        interpreted as flags, this should be written with an
+                        equals sign between `--cut` and the argument, e.g.:
+                        `--cut=-10:`
+  --relative-to ID      Apply --cut relative to the indexes of non-gap
+                        residues in sequence identified by ID
+  --drop start:end[,start2:end2]
+                        Remove the residues at the specified indices. Same
+                        format as `--cut`.
+  --dash-gap            Replace any of the characters "?.:~" with a "-" for
+                        all sequences
+  --lower               Translate the sequences to lower case
+  --mask start1:end1[,start2:end2]
+                        Replace residues in 1-indexed slice with gap-
+                        characters. If --relative-to is also specified,
+                        coordinates are relative to the sequence ID provided.
+  --reverse             Reverse the order of sites in sequences
+  --reverse-complement  Convert sequences into reverse complements
+  --squeeze             Remove any gaps that are present in the same position
+                        across all sequences in an alignment (equivalent to
+                        --squeeze-threshold=1.0)
+  --squeeze-threshold PROP
+                        Trim columns from an alignment which have gaps in
+                        least the specified proportion of sequences.
+  --transcribe {dna2rna,rna2dna}
+                        Transcription and back transcription for generic DNA
+                        and RNA. Source sequences must be the correct alphabet
+                        or this action will likely produce incorrect results.
+  --translate {dna2protein,rna2protein,dna2proteinstop,rna2proteinstop}
+                        Translate from generic DNA/RNA to proteins. Options
+                        with "stop" suffix will NOT translate through stop
+                        codons . Source sequences must be the correct alphabet
+                        or this action will likely produce incorrect results.
+  --ungap               Remove gaps in the sequence alignment
+  --upper               Translate the sequences to upper case
+
+Record Selection:
+  --deduplicate-sequences
+                        Remove any duplicate sequences by sequence content,
+                        keep the first instance seen
+  --deduplicated-sequences-file FILE
+                        Write all of the deduplicated sequences to a file
+  --deduplicate-taxa    Remove any duplicate sequences by ID, keep the first
+                        instance seen
+  --exclude-from-file FILE
+                        Filter sequences, removing those sequence IDs in the
+                        specified file
+  --include-from-file FILE
+                        Filter sequences, keeping only those sequence IDs in
+                        the specified file
+  --head N              Trim down to top N sequences. With the leading `-',
+                        print all but the last N sequences.
+  --max-length N        Discard any sequences beyond the specified maximum
+                        length. This operation occurs *before* all length-
+                        changing options such as cut and squeeze.
+  --min-length N        Discard any sequences less than the specified minimum
+                        length. This operation occurs *before* cut and
+                        squeeze.
+  --min-ungapped-length N
+                        Discard any sequences less than the specified minimum
+                        length, excluding gaps. This operation occurs *before*
+                        cut and squeeze.
+  --pattern-include REGEX
+                        Filter the sequences by regular expression in ID or
+                        description
+  --pattern-exclude REGEX
+                        Filter the sequences by regular expression in ID or
+                        description
+  --prune-empty         Prune sequences containing only gaps ('-')
+  --sample N            Select a random sampling of sequences
+  --seq-pattern-include REGEX
+                        Filter the sequences by regular expression in sequence
+  --seq-pattern-exclude REGEX
+                        Filter the sequences by regular expression in sequence
+  --tail N              Trim down to bottom N sequences. Use +N to output
+                        sequences starting with the Nth.
+
+Sequence ID Modification:
+  --first-name          Take only the first whitespace-delimited word as the
+                        name of the sequence
+  --name-suffix SUFFIX  Append a suffix to all IDs.
+  --name-prefix PREFIX  Insert a prefix for all IDs.
+  --pattern-replace search_pattern replace_pattern
+                        Replace regex pattern "search_pattern" with
+                        "replace_pattern" in sequence ID and description
+  --strip-range         Strip ranges from sequences IDs, matching </x-y>
+
+Format Options:
+  --input-format FORMAT
+                        Input file format (default: determine from extension)
+  --output-format FORMAT
+                        Output file format (default: determine from extension)
+
+Filters using regular expressions are case-sensitive by default. Append "(?i)"
+to a pattern to make it case-insensitive.
diff --git a/docs/convert_mogrify.rst b/docs/convert_mogrify.rst
index 17dbc94..cdfb495 100644
--- a/docs/convert_mogrify.rst
+++ b/docs/convert_mogrify.rst
@@ -71,6 +71,6 @@ You can even define your own functions in python and use them via
 Command-line Arguments
 **********************
 
-.. program-output:: ../seqmagick.py convert -h
+.. literalinclude:: convert.help
 
 .. _`BioPython SeqIO wiki page`: http://www.biopython.org/wiki/SeqIO#File_Formats
diff --git a/docs/extract_ids.help b/docs/extract_ids.help
new file mode 100644
index 0000000..5417806
--- /dev/null
+++ b/docs/extract_ids.help
@@ -0,0 +1,18 @@
+usage: seqmagick extract-ids [-h] [-o OUTPUT_FILE]
+                             [--input-format INPUT_FORMAT] [-d]
+                             sequence_file
+
+Extract the sequence IDs from a file
+
+positional arguments:
+  sequence_file         Sequence file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -o OUTPUT_FILE, --output-file OUTPUT_FILE
+                        Destination trimmed file
+  --input-format INPUT_FORMAT
+                        Input format for sequence file
+  -d, --include-description
+                        Include the sequence description in output [default:
+                        False]
diff --git a/docs/extract_ids.rst b/docs/extract_ids.rst
index 9208f2e..b557402 100644
--- a/docs/extract_ids.rst
+++ b/docs/extract_ids.rst
@@ -4,5 +4,4 @@
 ``seqmagick extract-ids`` is extremely simple - all the IDs from a sequence file
 are printed to stdout (by default) or the file of your choosing:
 
-.. program-output:: ../seqmagick.py extract-ids -h
-
+.. literalinclude:: extract_ids.help
diff --git a/docs/info.help b/docs/info.help
new file mode 100644
index 0000000..31e42a7
--- /dev/null
+++ b/docs/info.help
@@ -0,0 +1,22 @@
+usage: seqmagick info [-h] [--input-format INPUT_FORMAT]
+                      [--out-file destination_file] [--format {tab,csv,align}]
+                      [--threads THREADS]
+                      sequence_files [sequence_files ...]
+
+Info action
+
+positional arguments:
+  sequence_files
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --input-format INPUT_FORMAT
+                        Input format. Overrides extension for all input files
+  --out-file destination_file
+                        Output destination. Default: STDOUT
+  --format {tab,csv,align}
+                        Specify output format as tab-delimited, CSV or aligned
+                        in a borderless table. Default is tab-delimited if the
+                        output is directed to a file, aligned if output to the
+                        console.
+  --threads THREADS     Number of threads (CPUs). [1]
diff --git a/docs/info.rst b/docs/info.rst
index 97698ae..efbe656 100644
--- a/docs/info.rst
+++ b/docs/info.rst
@@ -21,4 +21,4 @@ Output can be in comma-separated, tab-separated, or aligned formats. See
 
 Usage:
 
-.. program-output:: ../seqmagick.py info -h
+.. literalinclude:: info.help
diff --git a/docs/primer_trim.help b/docs/primer_trim.help
new file mode 100644
index 0000000..66ab5cd
--- /dev/null
+++ b/docs/primer_trim.help
@@ -0,0 +1,41 @@
+usage: seqmagick primer-trim [-h] [--reverse-is-revcomp]
+                             [--source-format SOURCE_FORMAT]
+                             [--output-format OUTPUT_FORMAT]
+                             [--include-primers]
+                             [--max-hamming-distance MAX_HAMMING_DISTANCE]
+                             [--prune-action {trim,isolate}]
+                             source_file output_file forward_primer
+                             reverse_primer
+
+Find a primer sequence in a gapped alignment, trim to amplicon
+
+positional arguments:
+  source_file           Source alignment file
+  output_file           Destination trimmed file
+  forward_primer        The forward primer used
+  reverse_primer        The reverse primer used. By default the reverse primer
+                        is assumed to be a subsequence of the top strand (that
+                        is, the reverse complement of an actual downstream PCR
+                        primer). Use --reverse-is-revcomp if this is not the
+                        case.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --reverse-is-revcomp  Reverse primer is written as the reverse complement of
+                        the top strand (default: False)
+  --source-format SOURCE_FORMAT
+                        Alignment format (default: detect from extension
+  --output-format OUTPUT_FORMAT
+                        Alignment format (default: detect from extension
+  --include-primers     Include the primers in the output (default: False)
+  --max-hamming-distance MAX_HAMMING_DISTANCE
+                        Maximum Hamming distance between primer and alignment
+                        site (default: 1). IUPAC ambiguous bases in the primer
+                        matching unambiguous bases in the alignment are not
+                        penalized
+  --prune-action {trim,isolate}
+                        Action to take. Options are trim (trim to the region
+                        defined by the two primers, decreasing the width of
+                        the alignment), or isolate (convert all characters
+                        outside the primer-defined area to gaps). default:
+                        trim
diff --git a/docs/primer_trim.rst b/docs/primer_trim.rst
index 560a77a..b212d9b 100644
--- a/docs/primer_trim.rst
+++ b/docs/primer_trim.rst
@@ -4,4 +4,4 @@
 ``primer-trim`` trims an alignment to a region defined by a set of forward and
 reverse primers. Usage is as follows:
 
-.. program-output:: ../seqmagick.py primer-trim -h
+.. literalinclude:: primer_trim.help
diff --git a/docs/quality_filter.help b/docs/quality_filter.help
new file mode 100644
index 0000000..3c95673
--- /dev/null
+++ b/docs/quality_filter.help
@@ -0,0 +1,79 @@
+usage: seqmagick quality-filter [-h] [--input-qual INPUT_QUAL]
+                                [--report-out REPORT_OUT]
+                                [--details-out DETAILS_OUT]
+                                [--no-details-comment]
+                                [--min-mean-quality QUALITY]
+                                [--min-length LENGTH] [--max-length LENGTH]
+                                [--quality-window-mean-qual QUALITY_WINDOW_MEAN_QUAL]
+                                [--quality-window-prop QUALITY_WINDOW_PROP]
+                                [--quality-window WINDOW_SIZE]
+                                [--ambiguous-action {truncate,drop}]
+                                [--max-ambiguous MAX_AMBIGUOUS]
+                                [--primer PRIMER | --no-primer]
+                                [--barcode-file BARCODE_FILE]
+                                [--barcode-header] [--map-out SAMPLE_MAP]
+                                [--quoting {QUOTE_ALL,QUOTE_MINIMAL,QUOTE_NONE,QUOTE_NONNUMERIC}]
+                                input_fastq output_file
+
+Filter reads based on quality scores
+
+positional arguments:
+  input_fastq           Input fastq file. A fasta-format file may also be
+                        provided if --input-qual is also specified.
+  output_file           Output file. Format determined from extension.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --input-qual INPUT_QUAL
+                        The quality scores associated with the input file.
+                        Only used if input file is fasta.
+  --min-mean-quality QUALITY
+                        Minimum mean quality score for each read [default:
+                        25.0]
+  --min-length LENGTH   Minimum length to keep sequence [default: 200]
+  --max-length LENGTH   Maximum length to keep before truncating [default:
+                        1000]. This operation occurs before --max-ambiguous
+  --ambiguous-action {truncate,drop}
+                        Action to take on ambiguous base in sequence (N's).
+                        [default: no action]
+  --max-ambiguous MAX_AMBIGUOUS
+                        Maximum number of ambiguous bases in a sequence.
+                        Sequences exceeding this count will be removed.
+
+Output:
+  --report-out REPORT_OUT
+                        Output file for report [default: stdout]
+  --details-out DETAILS_OUT
+                        Output file to report fate of each sequence
+  --no-details-comment  Do not write comment lines with version and call to
+                        start --details-out
+
+Quality window options:
+  --quality-window-mean-qual QUALITY_WINDOW_MEAN_QUAL
+                        Minimum quality score within the window defined by
+                        --quality-window. [default: same as --min-mean-
+                        quality]
+  --quality-window-prop QUALITY_WINDOW_PROP
+                        Proportion of reads within quality window to that must
+                        pass filter. Floats are [default: 1.0]
+  --quality-window WINDOW_SIZE
+                        Window size for truncating sequences. When set to a
+                        non-zero value, sequences are truncated where the mean
+                        mean quality within the window drops below --min-mean-
+                        quality. [default: 0]
+
+Barcode/Primer:
+  --primer PRIMER       IUPAC ambiguous primer to require
+  --no-primer           Do not use a primer.
+  --barcode-file BARCODE_FILE
+                        CSV file containing sample_id,barcode[,primer] in the
+                        rows. A single primer for all sequences may be
+                        specified with `--primer`, or `--no-primer` may be
+                        used to indicate barcodes should be used without a
+                        primer check.
+  --barcode-header      Barcodes have a header row [default: False]
+  --map-out SAMPLE_MAP  Path to write sequence_id,sample_id pairs
+  --quoting {QUOTE_ALL,QUOTE_MINIMAL,QUOTE_NONE,QUOTE_NONNUMERIC}
+                        A string naming an attribute of the csv module
+                        defining the quoting behavior for `SAMPLE_MAP`.
+                        [default: QUOTE_MINIMAL]
diff --git a/docs/quality_filter.rst b/docs/quality_filter.rst
index d731006..625a1e7 100644
--- a/docs/quality_filter.rst
+++ b/docs/quality_filter.rst
@@ -5,4 +5,4 @@
 quality criteria.  The subcommand takes a FASTA and quality score file, and
 writes the results to an output file:
 
-.. program-output:: ../seqmagick.py quality-filter -h
+.. literalinclude:: quality_filter.help
diff --git a/docs/regen_help.sh b/docs/regen_help.sh
new file mode 100755
index 0000000..76e1736
--- /dev/null
+++ b/docs/regen_help.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+subcommands="convert info primer-trim quality-filter extract-ids backtrans-align"
+
+for cmd in $subcommands; do
+    seqmagick $cmd --help 2>&1 > ${cmd//-/_}.help
+done